[ovs-dev] [PATCH v3 1/6] tc: allow offloading of block ids

John Hurley john.hurley at netronome.com
Thu Jun 28 16:03:02 UTC 2018


Blocks, in tc classifiers, allow the grouping of multiple qdiscs with an
associated block id. Whenever a filter is added to/removed from this
block, the filter is added to/removed from all associated qdiscs.

Extend TC offload functions to take a block id as a parameter. If the id
is zero then the dqisc is not considered part of a block.

Signed-off-by: John Hurley <john.hurley at netronome.com>
Reviewed-by: Simon Horman <simon.horman at netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe at netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski at netronome.com>
---
 lib/netdev-linux.c       |  4 +--
 lib/netdev-tc-offloads.c | 64 +++++++++++++++++++++++++++++++++++-------------
 lib/tc.c                 | 60 ++++++++++++++++++++++++++++++++-------------
 lib/tc.h                 | 12 ++++-----
 4 files changed, 98 insertions(+), 42 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 054a9b6..d89a0fb 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2275,7 +2275,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
 
     COVERAGE_INC(netdev_set_policing);
     /* Remove any existing ingress qdisc. */
-    error = tc_add_del_ingress_qdisc(ifindex, false);
+    error = tc_add_del_ingress_qdisc(ifindex, false, 0);
     if (error) {
         VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
                      netdev_name, ovs_strerror(error));
@@ -2283,7 +2283,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
     }
 
     if (kbits_rate) {
-        error = tc_add_del_ingress_qdisc(ifindex, true);
+        error = tc_add_del_ingress_qdisc(ifindex, true, 0);
         if (error) {
             VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
                          netdev_name, ovs_strerror(error));
diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 0d8cc2c..14558ad 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -44,6 +44,7 @@ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
 
 static struct hmap ufid_tc = HMAP_INITIALIZER(&ufid_tc);
 static bool multi_mask_per_prio = false;
+static bool block_support = false;
 
 struct netlink_field {
     int offset;
@@ -307,6 +308,7 @@ int
 netdev_tc_flow_flush(struct netdev *netdev)
 {
     int ifindex = netdev_get_ifindex(netdev);
+    uint32_t block_id = 0;
 
     if (ifindex < 0) {
         VLOG_ERR_RL(&error_rl, "flow_flush: failed to get ifindex for %s: %s",
@@ -314,7 +316,7 @@ netdev_tc_flow_flush(struct netdev *netdev)
         return -ifindex;
     }
 
-    return tc_flush(ifindex);
+    return tc_flush(ifindex, block_id);
 }
 
 int
@@ -322,6 +324,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
                            struct netdev_flow_dump **dump_out)
 {
     struct netdev_flow_dump *dump;
+    uint32_t block_id = 0;
     int ifindex;
 
     ifindex = netdev_get_ifindex(netdev);
@@ -334,7 +337,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
     dump = xzalloc(sizeof *dump);
     dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
     dump->netdev = netdev_ref(netdev);
-    tc_dump_flower_start(ifindex, dump->nl_dump);
+    tc_dump_flower_start(ifindex, dump->nl_dump, block_id);
 
     *dump_out = dump;
 
@@ -890,6 +893,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     struct flow *mask = &match->wc.masks;
     const struct flow_tnl *tnl = &match->flow.tunnel;
     struct tc_action *action;
+    uint32_t block_id = 0;
     struct nlattr *nla;
     size_t left;
     int prio = 0;
@@ -1097,7 +1101,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     handle = get_ufid_tc_mapping(ufid, &prio, NULL);
     if (handle && prio) {
         VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", handle, prio);
-        tc_del_filter(ifindex, prio, handle);
+        tc_del_filter(ifindex, prio, handle, block_id);
     }
 
     if (!prio) {
@@ -1111,7 +1115,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     flower.act_cookie.data = ufid;
     flower.act_cookie.len = sizeof *ufid;
 
-    err = tc_replace_flower(ifindex, prio, handle, &flower);
+    err = tc_replace_flower(ifindex, prio, handle, &flower, block_id);
     if (!err) {
         add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex);
     }
@@ -1131,6 +1135,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED,
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
     struct netdev *dev;
     struct tc_flower flower;
+    uint32_t block_id = 0;
     odp_port_t in_port;
     int prio = 0;
     int ifindex;
@@ -1152,7 +1157,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED,
 
     VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d)",
                 netdev_get_name(dev), prio, handle);
-    err = tc_get_flower(ifindex, prio, handle, &flower);
+    err = tc_get_flower(ifindex, prio, handle, &flower, block_id);
     netdev_close(dev);
     if (err) {
         VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s",
@@ -1175,6 +1180,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
                    struct dpif_flow_stats *stats)
 {
     struct tc_flower flower;
+    uint32_t block_id = 0;
     struct netdev *dev;
     int prio = 0;
     int ifindex;
@@ -1196,14 +1202,14 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
 
     if (stats) {
         memset(stats, 0, sizeof *stats);
-        if (!tc_get_flower(ifindex, prio, handle, &flower)) {
+        if (!tc_get_flower(ifindex, prio, handle, &flower, block_id)) {
             stats->n_packets = get_32aligned_u64(&flower.stats.n_packets);
             stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes);
             stats->used = flower.lastused;
         }
     }
 
-    error = tc_del_filter(ifindex, prio, handle);
+    error = tc_del_filter(ifindex, prio, handle, block_id);
     del_ufid_tc_mapping(ufid);
 
     netdev_close(dev);
@@ -1212,7 +1218,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
 }
 
 static void
-probe_multi_mask_per_prio(int ifindex)
+probe_multi_mask_per_prio(int ifindex, uint32_t block_id)
 {
     struct tc_flower flower;
     int error;
@@ -1224,7 +1230,7 @@ probe_multi_mask_per_prio(int ifindex)
     memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
     memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac);
 
-    error = tc_replace_flower(ifindex, 1, 1, &flower);
+    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id);
     if (error) {
         return;
     }
@@ -1232,23 +1238,42 @@ probe_multi_mask_per_prio(int ifindex)
     memset(&flower.key.src_mac, 0x11, sizeof flower.key.src_mac);
     memset(&flower.mask.src_mac, 0xff, sizeof flower.mask.src_mac);
 
-    error = tc_replace_flower(ifindex, 1, 2, &flower);
-    tc_del_filter(ifindex, 1, 1);
+    error = tc_replace_flower(ifindex, 1, 2, &flower, block_id);
+    tc_del_filter(ifindex, 1, 1, block_id);
 
     if (error) {
         return;
     }
 
-    tc_del_filter(ifindex, 1, 2);
+    tc_del_filter(ifindex, 1, 2, block_id);
 
     multi_mask_per_prio = true;
     VLOG_INFO("probe tc: multiple masks on single tc prio is supported.");
 }
 
+static void
+probe_tc_block_support(int ifindex)
+{
+    uint32_t block_id = 1;
+    int error;
+
+    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
+    if (error) {
+        return;
+    }
+
+    tc_add_del_ingress_qdisc(ifindex, false, block_id);
+
+    block_support = true;
+    VLOG_INFO("probe tc: block offload is supported.");
+}
+
 int
 netdev_tc_init_flow_api(struct netdev *netdev)
 {
-    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+    static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER;
+    static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER;
+    uint32_t block_id = 0;
     int ifindex;
     int error;
 
@@ -1259,7 +1284,12 @@ netdev_tc_init_flow_api(struct netdev *netdev)
         return -ifindex;
     }
 
-    error = tc_add_del_ingress_qdisc(ifindex, true);
+    if (ovsthread_once_start(&block_once)) {
+        probe_tc_block_support(ifindex);
+        ovsthread_once_done(&block_once);
+    }
+
+    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
 
     if (error && error != EEXIST) {
         VLOG_ERR("failed adding ingress qdisc required for offloading: %s",
@@ -1269,9 +1299,9 @@ netdev_tc_init_flow_api(struct netdev *netdev)
 
     VLOG_INFO("added ingress qdisc to %s", netdev_get_name(netdev));
 
-    if (ovsthread_once_start(&once)) {
-        probe_multi_mask_per_prio(ifindex);
-        ovsthread_once_done(&once);
+    if (ovsthread_once_start(&multi_mask_once)) {
+        probe_multi_mask_per_prio(ifindex, block_id);
+        ovsthread_once_done(&multi_mask_once);
     }
 
     return 0;
diff --git a/lib/tc.c b/lib/tc.c
index 7133486..7a71036 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -43,6 +43,14 @@
 
 #define MAX_PEDIT_OFFSETS 32
 
+#ifndef TCM_IFINDEX_MAGIC_BLOCK
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+#endif
+
+#if TCA_MAX < 14
+#define TCA_INGRESS_BLOCK 13
+#endif
+
 VLOG_DEFINE_THIS_MODULE(tc);
 
 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
@@ -173,10 +181,14 @@ tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
  * The configuration and stats may be seen with the following command:
  *     /sbin/tc -s qdisc show dev <devname>
  *
+ * If block_id is greater than 0, then the ingress qdisc is added to a block.
+ * In this case, it is equivalent to running (when 'add' is true):
+ *     /sbin/tc qdisc add dev <devname> ingress_block <block_id> ingress
+ *
  * Returns 0 if successful, otherwise a positive errno value.
  */
 int
-tc_add_del_ingress_qdisc(int ifindex, bool add)
+tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -189,6 +201,9 @@ tc_add_del_ingress_qdisc(int ifindex, bool add)
     tcmsg->tcm_parent = TC_H_INGRESS;
     nl_msg_put_string(&request, TCA_KIND, "ingress");
     nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
+    if (block_id) {
+        nl_msg_put_u32(&request, TCA_INGRESS_BLOCK, block_id);
+    }
 
     error = tc_transact(&request, NULL);
     if (error) {
@@ -1007,13 +1022,15 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
 }
 
 int
-tc_dump_flower_start(int ifindex, struct nl_dump *dump)
+tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
+    int index;
 
-    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
-    tcmsg->tcm_parent = TC_INGRESS_PARENT;
+    index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+    tcmsg = tc_make_request(index, RTM_GETTFILTER, NLM_F_DUMP, &request);
+    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
     tcmsg->tcm_info = TC_H_UNSPEC;
     tcmsg->tcm_handle = 0;
 
@@ -1024,28 +1041,32 @@ tc_dump_flower_start(int ifindex, struct nl_dump *dump)
 }
 
 int
-tc_flush(int ifindex)
+tc_flush(int ifindex, uint32_t block_id)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
+    int index;
 
-    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request);
-    tcmsg->tcm_parent = TC_INGRESS_PARENT;
+    index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+    tcmsg = tc_make_request(index, RTM_DELTFILTER, NLM_F_ACK, &request);
+    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
     tcmsg->tcm_info = TC_H_UNSPEC;
 
     return tc_transact(&request, NULL);
 }
 
 int
-tc_del_filter(int ifindex, int prio, int handle)
+tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
     struct ofpbuf *reply;
     int error;
+    int index;
 
-    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request);
-    tcmsg->tcm_parent = TC_INGRESS_PARENT;
+    index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+    tcmsg = tc_make_request(index, RTM_DELTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
     tcmsg->tcm_info = tc_make_handle(prio, 0);
     tcmsg->tcm_handle = handle;
 
@@ -1057,15 +1078,18 @@ tc_del_filter(int ifindex, int prio, int handle)
 }
 
 int
-tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower)
+tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower,
+              uint32_t block_id)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
     struct ofpbuf *reply;
     int error;
+    int index;
 
-    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_ECHO, &request);
-    tcmsg->tcm_parent = TC_INGRESS_PARENT;
+    index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+    tcmsg = tc_make_request(index, RTM_GETTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
     tcmsg->tcm_info = tc_make_handle(prio, 0);
     tcmsg->tcm_handle = handle;
 
@@ -1625,7 +1649,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
 
 int
 tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
-                  struct tc_flower *flower)
+                  struct tc_flower *flower, uint32_t block_id)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -1633,10 +1657,12 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
     int error = 0;
     size_t basic_offset;
     uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
+    int index;
 
-    tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER,
-                            NLM_F_CREATE | NLM_F_ECHO, &request);
-    tcmsg->tcm_parent = TC_INGRESS_PARENT;
+    index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+    tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
+                            &request);
+    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
     tcmsg->tcm_info = tc_make_handle(prio, eth_type);
     tcmsg->tcm_handle = handle;
 
diff --git a/lib/tc.h b/lib/tc.h
index cc19374..80505f0 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -63,7 +63,7 @@ tc_get_minor(unsigned int handle)
 struct tcmsg *tc_make_request(int ifindex, int type,
                               unsigned int flags, struct ofpbuf *);
 int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-int tc_add_del_ingress_qdisc(int ifindex, bool add);
+int tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id);
 
 struct tc_cookie {
     const void *data;
@@ -198,12 +198,12 @@ BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
                   + sizeof(uint32_t) - 2 < sizeof(struct tc_flower));
 
 int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
-                      struct tc_flower *flower);
-int tc_del_filter(int ifindex, int prio, int handle);
+                      struct tc_flower *flower, uint32_t block_id);
+int tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id);
 int tc_get_flower(int ifindex, int prio, int handle,
-                  struct tc_flower *flower);
-int tc_flush(int ifindex);
-int tc_dump_flower_start(int ifindex, struct nl_dump *dump);
+                  struct tc_flower *flower, uint32_t block_id);
+int tc_flush(int ifindex, uint32_t block_id);
+int tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id);
 int parse_netlink_to_tc_flower(struct ofpbuf *reply,
                                struct tc_flower *flower);
 void tc_set_policy(const char *policy);
-- 
2.7.4



More information about the dev mailing list