[ovs-dev] [RFC PATCH] xlate: Use dp_hash for select groups.

Jarno Rajahalme jarno at ovn.org
Tue Apr 19 00:42:53 UTC 2016


Add a new select group selection method "dp_hash", which uses minimal
number of bits from the datapath calculated packet hash to inform the
select group bucket selection.  This makes the datapath flows more
generic resulting in less upcalls to userspace, but adds recirculation
prior to group selection.

Signed-off-by: Jarno Rajahalme <jarno at ovn.org>
---
 lib/ofp-parse.c              |  2 ++
 lib/ofp-util.c               | 16 ++++------
 ofproto/ofproto-dpif-xlate.c | 74 ++++++++++++++++++++++++++++++++++++++++++--
 ofproto/ofproto-dpif.c       | 13 +++++---
 ofproto/ofproto-dpif.h       |  3 +-
 tests/ofproto-dpif.at        | 38 +++++++++++++++++++++++
 tests/ofproto.at             |  3 ++
 7 files changed, 131 insertions(+), 18 deletions(-)

diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 71c5cdf..ce3a9e4 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -1536,6 +1536,8 @@ parse_ofp_group_mod_str__(struct ofputil_group_mod *gm, uint16_t command,
         goto out;
     }
 
+    /* XXX Exclude fields for non "hash" selection method. */
+    
     if (fields & F_COMMAND_BUCKET_ID) {
         if (!(fields & F_COMMAND_BUCKET_ID_ALL || had_command_bucket_id)) {
             error = xstrdup("must specify a command bucket id");
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index bb1535d..68feabb 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -8697,28 +8697,24 @@ parse_group_prop_ntr_selection_method(struct ofpbuf *payload,
         return OFPERR_OFPBPC_BAD_VALUE;
     }
 
-    if (strcmp("hash", prop->selection_method)) {
+    if (strcmp("hash", prop->selection_method)
+        && strcmp("dp_hash", prop->selection_method)) {
         OFPPROP_LOG(&bad_ofmsg_rl, false,
                     "ntr selection method '%s' is not supported",
                     prop->selection_method);
         return OFPERR_OFPBPC_BAD_VALUE;
     }
+    /* 'method_len' is now non-zero. */
 
     strcpy(gp->selection_method, prop->selection_method);
     gp->selection_method_param = ntohll(prop->selection_method_param);
 
-    if (!method_len && gp->selection_method_param) {
-        OFPPROP_LOG(&bad_ofmsg_rl, false, "ntr selection method parameter is "
-                    "non-zero but selection method is empty");
-        return OFPERR_OFPBPC_BAD_VALUE;
-    }
-
     ofpbuf_pull(payload, sizeof *prop);
 
     fields_len = ntohs(prop->length) - sizeof *prop;
-    if (!method_len && fields_len) {
-        OFPPROP_LOG(&bad_ofmsg_rl, false, "ntr selection method parameter is "
-                    "zero but fields are provided");
+    if (fields_len && strcmp("hash", gp->selection_method)) {
+        OFPPROP_LOG(&bad_ofmsg_rl, false, "ntr selection method %s "
+                    "does not support fields", gp->selection_method);
         return OFPERR_OFPBPC_BAD_VALUE;
     }
 
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 5937913..4231e5f 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -317,6 +317,11 @@ struct xlate_ctx {
     * case at that point.
     */
     bool freezing;
+    bool recirc_update_dp_hash;    /* Generated recirculation will be preceded
+                                    * by datapath HASH action to get an updated
+                                    * dp_hash after recirculation. */
+    uint32_t dp_hash_alg;
+    uint32_t dp_hash_basis;
     struct ofpbuf frozen_actions;
     const struct ofpact_controller *pause;
 
@@ -373,6 +378,17 @@ ctx_trigger_freeze(struct xlate_ctx *ctx)
     ctx->freezing = true;
 }
 
+static void
+ctx_trigger_recirculate_with_hash(struct xlate_ctx *ctx, uint32_t type,
+                                  uint32_t basis)
+{
+    ctx->exit = true;
+    ctx->freezing = true;
+    ctx->recirc_update_dp_hash = true;
+    ctx->dp_hash_alg = type;
+    ctx->dp_hash_basis = basis;
+}
+
 static bool
 ctx_first_frozen_action(const struct xlate_ctx *ctx)
 {
@@ -384,6 +400,7 @@ ctx_cancel_freeze(struct xlate_ctx *ctx)
 {
     if (ctx->freezing) {
         ctx->freezing = false;
+        ctx->recirc_update_dp_hash = false;
         ofpbuf_clear(&ctx->frozen_actions);
         ctx->frozen_actions.header = NULL;
     }
@@ -1504,8 +1521,9 @@ group_first_live_bucket(const struct xlate_ctx *ctx,
 {
     struct ofputil_bucket *bucket;
     const struct ovs_list *buckets;
+    uint32_t n_buckets;
 
-    group_dpif_get_buckets(group, &buckets);
+    group_dpif_get_buckets(group, &buckets, &n_buckets);
     LIST_FOR_EACH (bucket, list_node, buckets) {
         if (bucket_is_alive(ctx, bucket, depth)) {
             return bucket;
@@ -1526,8 +1544,9 @@ group_best_live_bucket(const struct xlate_ctx *ctx,
 
     struct ofputil_bucket *bucket;
     const struct ovs_list *buckets;
+    uint32_t n_buckets;
 
-    group_dpif_get_buckets(group, &buckets);
+    group_dpif_get_buckets(group, &buckets, &n_buckets);
     LIST_FOR_EACH (bucket, list_node, buckets) {
         if (bucket_is_alive(ctx, bucket, 0)) {
             uint32_t score = (hash_int(i, basis) & 0xffff) * bucket->weight;
@@ -3362,8 +3381,9 @@ xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group)
 {
     struct ofputil_bucket *bucket;
     const struct ovs_list *buckets;
+    uint32_t n_buckets;
 
-    group_dpif_get_buckets(group, &buckets);
+    group_dpif_get_buckets(group, &buckets, &n_buckets);
 
     LIST_FOR_EACH (bucket, list_node, buckets) {
         xlate_group_bucket(ctx, bucket);
@@ -3469,6 +3489,41 @@ xlate_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
 }
 
 static void
+xlate_dp_hash_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
+{
+    struct ofputil_bucket *bucket;
+
+    /* dp_hash value 0 is special since it means that the dp_hash has not been
+     * computed, as all computed dp_hash values are non-zero.  Therefore
+     * compare to zero can be used to decide if the dp_hash value is valid
+     * without masking the dp_hash field. */
+    if (!ctx->xin->flow.dp_hash) {
+        uint64_t param = group_dpif_get_selection_method_param(group);
+
+        ctx_trigger_recirculate_with_hash(ctx, param >> 32, (uint32_t)param);
+    } else {
+        const struct ovs_list *buckets;
+        uint32_t n_buckets;
+
+        group_dpif_get_buckets(group, &buckets, &n_buckets);
+        if (n_buckets) {
+            /* Minimal mask to cover the number of buckets. */
+            uint32_t mask = (1 << log_2_ceil(n_buckets)) - 1;
+            /* Multiplier chosen to make the trivial 1 bit case to
+             * actually distribute amongst two equal weight buckets. */
+            uint32_t basis = 0xc2b73583 * (ctx->xin->flow.dp_hash & mask);
+
+            ctx->wc->masks.dp_hash |= mask;
+            bucket = group_best_live_bucket(ctx, group, basis);
+            if (bucket) {
+                xlate_group_bucket(ctx, bucket);
+                xlate_group_stats(ctx, group, bucket);
+            }
+        }
+    }
+}
+
+static void
 xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
 {
     const char *selection_method = group_dpif_get_selection_method(group);
@@ -3477,6 +3532,8 @@ xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
         xlate_default_select_group(ctx, group);
     } else if (!strcasecmp("hash", selection_method)) {
         xlate_hash_fields_select_group(ctx, group);
+    } else if (!strcasecmp("dp_hash", selection_method)) {
+        xlate_dp_hash_select_group(ctx, group);
     } else {
         /* Parsing of groups should ensure this never happens */
         OVS_NOT_REACHED();
@@ -3706,6 +3763,16 @@ finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
         }
         recirc_refs_add(&ctx->xout->recircs, id);
 
+        if (ctx->recirc_update_dp_hash) {
+            struct ovs_action_hash *act_hash;
+
+            /* Hash action. */
+            act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
+                                                OVS_ACTION_ATTR_HASH,
+                                                sizeof *act_hash);
+            act_hash->hash_alg = OVS_HASH_ALG_L4;  /* Make configurable. */
+            act_hash->hash_basis = 0;              /* Make configurable. */
+        }
         nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
     }
 
@@ -5105,6 +5172,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         .mirrors = 0,
 
         .freezing = false,
+        .recirc_update_dp_hash = false,
         .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
         .pause = NULL,
 
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 285e377..bb093e8 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4235,11 +4235,12 @@ group_construct_stats(struct group_dpif *group)
 {
     struct ofputil_bucket *bucket;
     const struct ovs_list *buckets;
+    uint32_t n_buckets;
 
     group->packet_count = 0;
     group->byte_count = 0;
 
-    group_dpif_get_buckets(group, &buckets);
+    group_dpif_get_buckets(group, &buckets, &n_buckets);
     LIST_FOR_EACH (bucket, list_node, buckets) {
         bucket->stats.packet_count = 0;
         bucket->stats.byte_count = 0;
@@ -4259,8 +4260,9 @@ group_dpif_credit_stats(struct group_dpif *group,
         bucket->stats.byte_count += stats->n_bytes;
     } else { /* Credit to all buckets */
         const struct ovs_list *buckets;
+        uint32_t n_buckets;
 
-        group_dpif_get_buckets(group, &buckets);
+        group_dpif_get_buckets(group, &buckets, &n_buckets);
         LIST_FOR_EACH (bucket, list_node, buckets) {
             bucket->stats.packet_count += stats->n_packets;
             bucket->stats.byte_count += stats->n_bytes;
@@ -4304,13 +4306,14 @@ group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
     struct group_dpif *group = group_dpif_cast(group_);
     struct ofputil_bucket *bucket;
     const struct ovs_list *buckets;
+    uint32_t n_buckets;
     struct bucket_counter *bucket_stats;
 
     ovs_mutex_lock(&group->stats_mutex);
     ogs->packet_count = group->packet_count;
     ogs->byte_count = group->byte_count;
 
-    group_dpif_get_buckets(group, &buckets);
+    group_dpif_get_buckets(group, &buckets, &n_buckets);
     bucket_stats = ogs->bucket_stats;
     LIST_FOR_EACH (bucket, list_node, buckets) {
         bucket_stats->packet_count = bucket->stats.packet_count;
@@ -4341,9 +4344,11 @@ group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
 
 void
 group_dpif_get_buckets(const struct group_dpif *group,
-                       const struct ovs_list **buckets)
+                       const struct ovs_list **buckets,
+                       uint32_t *n_buckets)
 {
     *buckets = &group->up.buckets;
+    *n_buckets = group->up.n_buckets;
 }
 
 enum ofp11_group_type
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 91bf463..5caf2da 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -137,7 +137,8 @@ bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
                        struct group_dpif **group);
 
 void group_dpif_get_buckets(const struct group_dpif *group,
-                            const struct ovs_list **buckets);
+                            const struct ovs_list **buckets,
+                            uint32_t *n_buckets);
 enum ofp11_group_type group_dpif_get_type(const struct group_dpif *group);
 const char *group_dpif_get_selection_method(const struct group_dpif *group);
 uint64_t group_dpif_get_selection_method_param(const struct group_dpif *group);
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 9ac2e2a..c016b34 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -448,6 +448,44 @@ AT_CHECK([sort results | uniq], [0],
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif - select group with dp_hash selection method])
+OVS_VSWITCHD_START
+add_of_ports br0 1 10 11
+AT_CHECK([ovs-ofctl -O OpenFlow15 add-group br0 'group_id=1234,type=select,selection_method=dp_hash,bucket=output:10,bucket=output:11'])
+AT_CHECK([ovs-ofctl -O OpenFlow15 add-flow br0 'ip,nw_src=192.168.0.1 actions=group:1234'])
+
+# Try a bunch of different flows and make sure that they get distributed
+# at least somewhat.
+for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+    pkt="in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:01),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.1.$d,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)"
+    AT_CHECK([ovs-appctl netdev-dummy/receive p1 $pkt])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/dp_hash(.*\/0x1)/dp_hash(0xXXXX\/0x1)/' | sed 's/packets.*actions:1/actions:1/' | strip_ufid | strip_used | sort], [0], [dnl
+flow-dump from non-dpdk interfaces:
+recirc_id(0),in_port(1),eth_type(0x0800),ipv4(src=192.168.0.1,frag=no), packets:15, bytes:900, used:0.0s, actions:hash(hash_l4(0)),recirc(0x1)
+recirc_id(0x1),dp_hash(0xXXXX/0x1),in_port(1),eth_type(0x0800),ipv4(frag=no), actions:10
+recirc_id(0x1),dp_hash(0xXXXX/0x1),in_port(1),eth_type(0x0800),ipv4(frag=no), actions:11
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0])
+
+# Try a bunch of different flows and make sure that they are not distributed
+# as they only vary a field that is not hashed
+for d in 0 1 2 3 4 5 6 7 8 9 a b c d e f; do
+    pkt="in_port(1),eth(src=50:54:00:00:00:$d,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)"
+    AT_CHECK([ovs-appctl netdev-dummy/receive p1 $pkt])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/dp_hash(.*\/0x1)/dp_hash(0xXXXX\/0x1)/' | sed 's/packets.*actions:1/actions:1/' | strip_ufid | strip_used | sort], [0], [dnl
+flow-dump from non-dpdk interfaces:
+recirc_id(0),in_port(1),eth_type(0x0800),ipv4(src=192.168.0.1,frag=no), packets:15, bytes:900, used:0.0s, actions:hash(hash_l4(0)),recirc(0x2)
+recirc_id(0x2),dp_hash(0xXXXX/0x1),in_port(1),eth_type(0x0800),ipv4(frag=no), actions:11
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - fast failover group])
 OVS_VSWITCHD_START
 add_of_ports br0 1 10 11
diff --git a/tests/ofproto.at b/tests/ofproto.at
index c4260ab..a15f9c9 100644
--- a/tests/ofproto.at
+++ b/tests/ofproto.at
@@ -402,6 +402,7 @@ AT_DATA([groups.txt], [dnl
 group_id=1233,type=select,selection_method=hash,bucket=output:10,bucket=output:11
 group_id=1234,type=select,selection_method=hash,fields(eth_dst,ip_dst,tcp_dst),bucket=output:10,bucket=output:11
 group_id=1235,type=all,bucket=actions=output:12,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
+group_id=1236,type=select,selection_method=dp_hash,bucket=output:10,bucket=output:11
 ])
 AT_CHECK([ovs-ofctl -O OpenFlow15 -vwarn add-groups br0 groups.txt])
 AT_CHECK([ovs-ofctl -F OXM-OpenFlow15 -O OpenFlow15 -vwarn dump-groups br0 1234], [0], [stdout])
@@ -414,6 +415,7 @@ AT_CHECK([ovs-ofctl -O OpenFlow15 -vwarn dump-groups br0], [0], [stdout])
 AT_CHECK([strip_xids < stdout], [0], [dnl
 OFPST_GROUP_DESC reply (OF1.5):
  group_id=1235,type=all,bucket=bucket_id:2,actions=output:12,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
+ group_id=1236,type=select,selection_method=dp_hash,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
  group_id=1233,type=select,selection_method=hash,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
 ])
 AT_CHECK([ovs-ofctl -O OpenFlow15 -vwarn del-groups br0 group_id=1234])
@@ -421,6 +423,7 @@ AT_CHECK([ovs-ofctl -O OpenFlow15 -vwarn dump-groups br0], [0], [stdout])
 AT_CHECK([strip_xids < stdout], [0], [dnl
 OFPST_GROUP_DESC reply (OF1.5):
  group_id=1235,type=all,bucket=bucket_id:2,actions=output:12,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
+ group_id=1236,type=select,selection_method=dp_hash,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
  group_id=1233,type=select,selection_method=hash,bucket=bucket_id:0,actions=output:10,bucket=bucket_id:1,actions=output:11
 ])
 AT_CHECK([ovs-ofctl -O OpenFlow15 -vwarn del-groups br0], [0])
-- 
2.1.4




More information about the dev mailing list