[ovs-dev] [PATCH OVS v3 3/4] ovs-tc: allow offloading TC rules to egress qdiscs

John Hurley john.hurley at netronome.com
Tue Apr 9 14:36:13 UTC 2019


Offloading rules to a TC datapath only allows the creating of ingress hook
qdiscs and the application of filters to these. However, there may be
certain situations where an egress qdisc is more applicable (e.g. when
offloading to TC rules applied to OvS internal ports).

Extend the TC API in OvS to allow the creation of egress qdiscs and to add
or interact with flower filters applied to these.

Signed-off-by: John Hurley <john.hurley at netronome.com>
Reviewed-by: Simon Horman <simon.horman at netronome.com>
Reviewed-by: Roi Dayan <roid at mellanox.com>
---
 lib/netdev-linux.c       | 17 +++++++-------
 lib/netdev-tc-offloads.c | 36 ++++++++++++++++--------------
 lib/tc.c                 | 58 ++++++++++++++++++++++++++++++++++--------------
 lib/tc.h                 | 26 +++++++++++++++++-----
 4 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index deedc69..87337e0 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -747,10 +747,10 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
                 lag->node = shash_add(&lag_shash, change->ifname, lag);
 
                 /* delete ingress block in case it exists */
-                tc_add_del_ingress_qdisc(change->if_index, false, 0);
+                tc_add_del_qdisc(change->if_index, false, 0, TC_INGRESS);
                 /* LAG master is linux netdev so add slave to same block. */
-                error = tc_add_del_ingress_qdisc(change->if_index, true,
-                                                 block_id);
+                error = tc_add_del_qdisc(change->if_index, true, block_id,
+                                         TC_INGRESS);
                 if (error) {
                     VLOG_WARN("failed to bind LAG slave %s to master's block",
                               change->ifname);
@@ -766,8 +766,8 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
         lag = shash_find_data(&lag_shash, change->ifname);
 
         if (lag) {
-            tc_add_del_ingress_qdisc(change->if_index, false,
-                                     lag->block_id);
+            tc_add_del_qdisc(change->if_index, false, lag->block_id,
+                             TC_INGRESS);
             shash_delete(&lag_shash, lag->node);
             free(lag);
         }
@@ -2430,7 +2430,8 @@ tc_del_matchall_policer(struct netdev *netdev)
         return err;
     }
 
-    err = tc_del_filter(ifindex, TC_RESERVED_PRIORITY_POLICE, 1, block_id);
+    err = tc_del_filter(ifindex, TC_RESERVED_PRIORITY_POLICE, 1, block_id,
+                        TC_INGRESS);
     if (err) {
         return err;
     }
@@ -2486,7 +2487,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
 
     COVERAGE_INC(netdev_set_policing);
     /* Remove any existing ingress qdisc. */
-    error = tc_add_del_ingress_qdisc(ifindex, false, 0);
+    error = tc_add_del_qdisc(ifindex, false, 0, TC_INGRESS);
     if (error) {
         VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
                      netdev_name, ovs_strerror(error));
@@ -2494,7 +2495,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
     }
 
     if (kbits_rate) {
-        error = tc_add_del_ingress_qdisc(ifindex, true, 0);
+        error = tc_add_del_qdisc(ifindex, true, 0, TC_INGRESS);
         if (error) {
             VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
                          netdev_name, ovs_strerror(error));
diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index c18992d..634c9b9 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -189,7 +189,7 @@ del_filter_and_ufid_mapping(int ifindex, int prio, int handle,
 {
     int err;
 
-    err = tc_del_filter(ifindex, prio, handle, block_id);
+    err = tc_del_filter(ifindex, prio, handle, block_id, TC_INGRESS);
     del_ufid_tc_mapping(ufid);
 
     return err;
@@ -357,7 +357,7 @@ netdev_tc_flow_flush(struct netdev *netdev)
 
     block_id = get_block_id_from_netdev(netdev);
 
-    return tc_flush(ifindex, block_id);
+    return tc_flush(ifindex, block_id, TC_INGRESS);
 }
 
 int
@@ -379,7 +379,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
     dump = xzalloc(sizeof *dump);
     dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
     dump->netdev = netdev_ref(netdev);
-    tc_dump_flower_start(ifindex, dump->nl_dump, block_id);
+    tc_dump_flower_start(ifindex, dump->nl_dump, block_id, TC_INGRESS);
 
     *dump_out = dump;
 
@@ -1356,7 +1356,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     flower.act_cookie.data = ufid;
     flower.act_cookie.len = sizeof *ufid;
 
-    err = tc_replace_flower(ifindex, prio, handle, &flower, block_id);
+    err = tc_replace_flower(ifindex, prio, handle, &flower, block_id,
+                            TC_INGRESS);
     if (!err) {
         add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex);
     }
@@ -1399,7 +1400,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED,
     block_id = get_block_id_from_netdev(dev);
     VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d block_id %d)",
                 netdev_get_name(dev), prio, handle, block_id);
-    err = tc_get_flower(ifindex, prio, handle, &flower, block_id);
+    err = tc_get_flower(ifindex, prio, handle, &flower, block_id, TC_INGRESS);
     netdev_close(dev);
     if (err) {
         VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s",
@@ -1446,7 +1447,8 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
 
     if (stats) {
         memset(stats, 0, sizeof *stats);
-        if (!tc_get_flower(ifindex, prio, handle, &flower, block_id)) {
+        if (!tc_get_flower(ifindex, prio, handle, &flower, block_id,
+                           TC_INGRESS)) {
             stats->n_packets = get_32aligned_u64(&flower.stats.n_packets);
             stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes);
             stats->used = flower.lastused;
@@ -1467,7 +1469,7 @@ probe_multi_mask_per_prio(int ifindex)
     int block_id = 0;
     int error;
 
-    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
+    error = tc_add_del_qdisc(ifindex, true, block_id, TC_INGRESS);
     if (error) {
         return;
     }
@@ -1479,7 +1481,7 @@ probe_multi_mask_per_prio(int ifindex)
     memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
     memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac);
 
-    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id);
+    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id, TC_INGRESS);
     if (error) {
         goto out;
     }
@@ -1487,20 +1489,20 @@ probe_multi_mask_per_prio(int ifindex)
     memset(&flower.key.src_mac, 0x11, sizeof flower.key.src_mac);
     memset(&flower.mask.src_mac, 0xff, sizeof flower.mask.src_mac);
 
-    error = tc_replace_flower(ifindex, 1, 2, &flower, block_id);
-    tc_del_filter(ifindex, 1, 1, block_id);
+    error = tc_replace_flower(ifindex, 1, 2, &flower, block_id, TC_INGRESS);
+    tc_del_filter(ifindex, 1, 1, block_id, TC_INGRESS);
 
     if (error) {
         goto out;
     }
 
-    tc_del_filter(ifindex, 1, 2, block_id);
+    tc_del_filter(ifindex, 1, 2, block_id, TC_INGRESS);
 
     multi_mask_per_prio = true;
     VLOG_INFO("probe tc: multiple masks on single tc prio is supported.");
 
 out:
-    tc_add_del_ingress_qdisc(ifindex, false, block_id);
+    tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS);
 }
 
 static void
@@ -1510,7 +1512,7 @@ probe_tc_block_support(int ifindex)
     uint32_t block_id = 1;
     int error;
 
-    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
+    error = tc_add_del_qdisc(ifindex, true, block_id, TC_INGRESS);
     if (error) {
         return;
     }
@@ -1522,9 +1524,9 @@ probe_tc_block_support(int ifindex)
     memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
     memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac);
 
-    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id);
+    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id, TC_INGRESS);
 
-    tc_add_del_ingress_qdisc(ifindex, false, block_id);
+    tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS);
 
     if (!error) {
         block_support = true;
@@ -1549,7 +1551,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
     }
 
     /* make sure there is no ingress qdisc */
-    tc_add_del_ingress_qdisc(ifindex, false, 0);
+    tc_add_del_qdisc(ifindex, false, 0, TC_INGRESS);
 
     if (ovsthread_once_start(&block_once)) {
         probe_tc_block_support(ifindex);
@@ -1562,7 +1564,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
     }
 
     block_id = get_block_id_from_netdev(netdev);
-    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
+    error = tc_add_del_qdisc(ifindex, true, block_id, TC_INGRESS);
 
     if (error && error != EEXIST) {
         VLOG_ERR("failed adding ingress qdisc required for offloading: %s",
diff --git a/lib/tc.c b/lib/tc.c
index 336cdeb..1eca356 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -201,14 +201,21 @@ tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
     return error;
 }
 
-/* Adds or deletes a root ingress qdisc on device with specified ifindex.
+/* Adds or deletes a root qdisc on device with specified ifindex.
  *
- * This function is equivalent to running the following when 'add' is true:
+ * The tc_qdisc_hook parameter determines if the qdisc is added on device
+ * ingress or egress.
+ *
+ * If tc_qdisc_hook is TC_INGRESS, this function is equivalent to running the
+ * following when 'add' is true:
  *     /sbin/tc qdisc add dev <devname> handle ffff: ingress
  *
  * This function is equivalent to running the following when 'add' is false:
  *     /sbin/tc qdisc del dev <devname> handle ffff: ingress
  *
+ * If tc_qdisc_hook is TC_EGRESS, this function is equivalent to:
+ *     /sbin/tc qdisc (add|del) dev <devname> handle ffff: clsact
+ *
  * Where dev <devname> is the device with specified ifindex name.
  *
  * The configuration and stats may be seen with the following command:
@@ -221,7 +228,8 @@ tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
  * Returns 0 if successful, otherwise a positive errno value.
  */
 int
-tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id)
+tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id,
+                 enum tc_qdisc_hook hook)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -230,11 +238,19 @@ tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id)
     int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
 
     tcmsg = tc_make_request(ifindex, type, flags, &request);
-    tcmsg->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-    tcmsg->tcm_parent = TC_H_INGRESS;
-    nl_msg_put_string(&request, TCA_KIND, "ingress");
+
+    if (hook == TC_EGRESS) {
+        tcmsg->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+        tcmsg->tcm_parent = TC_H_CLSACT;
+        nl_msg_put_string(&request, TCA_KIND, "clsact");
+    } else {
+        tcmsg->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+        tcmsg->tcm_parent = TC_H_INGRESS;
+        nl_msg_put_string(&request, TCA_KIND, "ingress");
+    }
+
     nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
-    if (block_id) {
+    if (hook == TC_INGRESS && block_id) {
         nl_msg_put_u32(&request, TCA_INGRESS_BLOCK, block_id);
     }
 
@@ -1455,7 +1471,8 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
 }
 
 int
-tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
+tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id,
+                     enum tc_qdisc_hook hook)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -1463,7 +1480,8 @@ tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
 
     index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
     tcmsg = tc_make_request(index, RTM_GETTFILTER, NLM_F_DUMP, &request);
-    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+    tcmsg->tcm_parent = (hook == TC_EGRESS) ?
+                        TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
     tcmsg->tcm_info = TC_H_UNSPEC;
     tcmsg->tcm_handle = 0;
 
@@ -1474,7 +1492,7 @@ tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
 }
 
 int
-tc_flush(int ifindex, uint32_t block_id)
+tc_flush(int ifindex, uint32_t block_id, enum tc_qdisc_hook hook)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -1482,14 +1500,16 @@ tc_flush(int ifindex, uint32_t block_id)
 
     index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
     tcmsg = tc_make_request(index, RTM_DELTFILTER, NLM_F_ACK, &request);
-    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+    tcmsg->tcm_parent = (hook == TC_EGRESS) ?
+                        TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
     tcmsg->tcm_info = TC_H_UNSPEC;
 
     return tc_transact(&request, NULL);
 }
 
 int
-tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
+tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id,
+              enum tc_qdisc_hook hook)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -1499,7 +1519,8 @@ tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
 
     index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
     tcmsg = tc_make_request(index, RTM_DELTFILTER, NLM_F_ECHO, &request);
-    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+    tcmsg->tcm_parent = (hook == TC_EGRESS) ?
+                        TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
     tcmsg->tcm_info = tc_make_handle(prio, 0);
     tcmsg->tcm_handle = handle;
 
@@ -1512,7 +1533,7 @@ tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
 
 int
 tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower,
-              uint32_t block_id)
+              uint32_t block_id, enum tc_qdisc_hook hook)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -1522,7 +1543,8 @@ tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower,
 
     index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
     tcmsg = tc_make_request(index, RTM_GETTFILTER, NLM_F_ECHO, &request);
-    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+    tcmsg->tcm_parent = (hook == TC_EGRESS) ?
+                        TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
     tcmsg->tcm_info = tc_make_handle(prio, 0);
     tcmsg->tcm_handle = handle;
 
@@ -2279,7 +2301,8 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
 
 int
 tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
-                  struct tc_flower *flower, uint32_t block_id)
+                  struct tc_flower *flower, uint32_t block_id,
+                  enum tc_qdisc_hook hook)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -2292,7 +2315,8 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
     index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
     tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
                             &request);
-    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+    tcmsg->tcm_parent = (hook == TC_EGRESS) ?
+                        TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
     tcmsg->tcm_info = tc_make_handle(prio, eth_type);
     tcmsg->tcm_handle = handle;
 
diff --git a/lib/tc.h b/lib/tc.h
index 154e120..2e0f5e3 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -36,8 +36,12 @@
 #ifndef TC_H_MIN_INGRESS
 #define TC_H_MIN_INGRESS       0xFFF2U
 #endif
+#ifndef TC_H_MIN_EGRESS
+#define TC_H_MIN_EGRESS       0xFFF3U
+#endif
 
 #define TC_INGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)
+#define TC_EGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)
 
 #define TC_POLICY_DEFAULT "none"
 
@@ -48,6 +52,11 @@ enum tc_flower_reserved_prio {
 };
 #define TC_RESERVED_PRIORITY_MAX (__TC_RESERVED_PRIORITY_MAX -1)
 
+enum tc_qdisc_hook {
+    TC_INGRESS,
+    TC_EGRESS,
+};
+
 /* Returns tc handle 'major':'minor'. */
 static inline unsigned int
 tc_make_handle(unsigned int major, unsigned int minor)
@@ -72,7 +81,8 @@ tc_get_minor(unsigned int handle)
 struct tcmsg *tc_make_request(int ifindex, int type,
                               unsigned int flags, struct ofpbuf *);
 int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-int tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id);
+int tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id,
+                     enum tc_qdisc_hook hook);
 
 struct tc_cookie {
     const void *data;
@@ -225,12 +235,16 @@ BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
                   + sizeof(uint32_t) - 2 < sizeof(struct tc_flower));
 
 int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
-                      struct tc_flower *flower, uint32_t block_id);
-int tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id);
+                      struct tc_flower *flower, uint32_t block_id,
+                      enum tc_qdisc_hook hook);
+int tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id,
+                  enum tc_qdisc_hook hook);
 int tc_get_flower(int ifindex, int prio, int handle,
-                  struct tc_flower *flower, uint32_t block_id);
-int tc_flush(int ifindex, uint32_t block_id);
-int tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id);
+                  struct tc_flower *flower, uint32_t block_id,
+                  enum tc_qdisc_hook hook);
+int tc_flush(int ifindex, uint32_t block_id, enum tc_qdisc_hook hook);
+int tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id,
+                         enum tc_qdisc_hook hook);
 int parse_netlink_to_tc_flower(struct ofpbuf *reply,
                                struct tc_flower *flower);
 void tc_set_policy(const char *policy);
-- 
2.7.4



More information about the dev mailing list