[ovs-dev] [PATCH ovs V4 01/24] tc: Add tc flower interface

Roi Dayan roid at mellanox.com
Mon Mar 13 13:36:51 UTC 2017


From: Paul Blakey <paulb at mellanox.com>

Add tc flower interface that will be used to offload flows via tc
flower classifier. Depending on the flag used (skip_sw/hw) flower
will pass those to HW or handle them itself.
Move some tc related functions from netdev-linux.c to tc.c

Co-authored-by: Shahar Klein <shahark at mellanox.com>
Signed-off-by: Shahar Klein <shahark at mellanox.com>
Signed-off-by: Paul Blakey <paulb at mellanox.com>
Reviewed-by: Roi Dayan <roid at mellanox.com>
---
 lib/automake.mk    |    2 +
 lib/netdev-linux.c |  164 ++-------
 lib/tc.c           | 1042 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/tc.h           |  119 ++++++
 4 files changed, 1203 insertions(+), 124 deletions(-)
 create mode 100644 lib/tc.c
 create mode 100644 lib/tc.h

diff --git a/lib/automake.mk b/lib/automake.mk
index abc9d0d..a6141ea 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -351,6 +351,8 @@ if LINUX
 lib_libopenvswitch_la_SOURCES += \
 	lib/dpif-netlink.c \
 	lib/dpif-netlink.h \
+	lib/tc.h \
+	lib/tc.c \
 	lib/if-notifier.c \
 	lib/if-notifier.h \
 	lib/netdev-linux.c \
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 9ff1333..085f530 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -29,8 +29,6 @@
 #include <linux/types.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
-#include <linux/pkt_cls.h>
-#include <linux/pkt_sched.h>
 #include <linux/rtnetlink.h>
 #include <linux/sockios.h>
 #include <sys/types.h>
@@ -74,6 +72,7 @@
 #include "unaligned.h"
 #include "openvswitch/vlog.h"
 #include "util.h"
+#include "tc.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_linux);
 
@@ -434,18 +433,14 @@ static const struct tc_ops *const tcs[] = {
     NULL
 };
 
-static unsigned int tc_make_handle(unsigned int major, unsigned int minor);
-static unsigned int tc_get_major(unsigned int handle);
-static unsigned int tc_get_minor(unsigned int handle);
-
 static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks);
 static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
 static unsigned int tc_buffer_per_jiffy(unsigned int rate);
+static struct tcmsg *netdev_linux_tc_make_request(const struct netdev *,
+                                                  int type,
+                                                  unsigned int flags,
+                                                  struct ofpbuf *);
 
-static struct tcmsg *tc_make_request(const struct netdev *, int type,
-                                     unsigned int flags, struct ofpbuf *);
-static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add);
 static int tc_add_policer(struct netdev *,
                           uint32_t kbits_rate, uint32_t kbits_burst);
 
@@ -2055,12 +2050,18 @@ netdev_linux_set_policing(struct netdev *netdev_,
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     const char *netdev_name = netdev_get_name(netdev_);
     int error;
+    int ifindex;
 
     kbits_burst = (!kbits_rate ? 0       /* Force to 0 if no rate specified. */
                    : !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
                    : kbits_burst);       /* Stick with user-specified value. */
 
     ovs_mutex_lock(&netdev->mutex);
+    error = get_ifindex(netdev_, &ifindex);
+    if (error) {
+        goto out;
+    }
+
     if (netdev->cache_valid & VALID_POLICING) {
         error = netdev->netdev_policing_error;
         if (error || (netdev->kbits_rate == kbits_rate &&
@@ -2073,7 +2074,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
 
     COVERAGE_INC(netdev_set_policing);
     /* Remove any existing ingress qdisc. */
-    error = tc_add_del_ingress_qdisc(netdev_, false);
+    error = tc_add_del_ingress_qdisc(ifindex, false);
     if (error) {
         VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
                      netdev_name, ovs_strerror(error));
@@ -2081,7 +2082,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
     }
 
     if (kbits_rate) {
-        error = tc_add_del_ingress_qdisc(netdev_, true);
+        error = tc_add_del_ingress_qdisc(ifindex, true);
         if (error) {
             VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
                          netdev_name, ovs_strerror(error));
@@ -2350,7 +2351,7 @@ start_queue_dump(const struct netdev *netdev, struct queue_dump_state *state)
     struct ofpbuf request;
     struct tcmsg *tcmsg;
 
-    tcmsg = tc_make_request(netdev, RTM_GETTCLASS, 0, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_GETTCLASS, 0, &request);
     if (!tcmsg) {
         return false;
     }
@@ -2909,8 +2910,8 @@ codel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3127,8 +3128,8 @@ fqcodel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3351,8 +3352,8 @@ sfq_setup_qdisc__(struct netdev *netdev, uint32_t quantum, uint32_t perturb)
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3538,8 +3539,8 @@ htb_setup_qdisc__(struct netdev *netdev)
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3592,7 +3593,8 @@ htb_setup_class__(struct netdev *netdev, unsigned int handle,
     opt.cbuffer = tc_calc_buffer(opt.ceil.rate, mtu, class->burst);
     opt.prio = class->priority;
 
-    tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTCLASS,
+                                         NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -4201,13 +4203,11 @@ hfsc_setup_qdisc__(struct netdev * netdev)
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
-
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
-
     tcmsg->tcm_handle = tc_make_handle(1, 0);
     tcmsg->tcm_parent = TC_H_ROOT;
 
@@ -4234,12 +4234,11 @@ hfsc_setup_class__(struct netdev *netdev, unsigned int handle,
     struct ofpbuf request;
     struct tc_service_curve min, max;
 
-    tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
-
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTCLASS,
+                                         NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
-
     tcmsg->tcm_handle = handle;
     tcmsg->tcm_parent = parent;
 
@@ -4610,102 +4609,17 @@ static double ticks_per_s;
  */
 static unsigned int buffer_hz;
 
-/* Returns tc handle 'major':'minor'. */
-static unsigned int
-tc_make_handle(unsigned int major, unsigned int minor)
-{
-    return TC_H_MAKE(major << 16, minor);
-}
-
-/* Returns the major number from 'handle'. */
-static unsigned int
-tc_get_major(unsigned int handle)
-{
-    return TC_H_MAJ(handle) >> 16;
-}
-
-/* Returns the minor number from 'handle'. */
-static unsigned int
-tc_get_minor(unsigned int handle)
-{
-    return TC_H_MIN(handle);
-}
-
 static struct tcmsg *
-tc_make_request(const struct netdev *netdev, int type, unsigned int flags,
-                struct ofpbuf *request)
+netdev_linux_tc_make_request(const struct netdev *netdev, int type,
+                             unsigned int flags, struct ofpbuf *request)
 {
-    struct tcmsg *tcmsg;
     int ifindex;
-    int error;
 
-    error = get_ifindex(netdev, &ifindex);
-    if (error) {
+    if (get_ifindex(netdev, &ifindex)) {
         return NULL;
     }
 
-    ofpbuf_init(request, 512);
-    nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
-    tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
-    tcmsg->tcm_family = AF_UNSPEC;
-    tcmsg->tcm_ifindex = ifindex;
-    /* Caller should fill in tcmsg->tcm_handle. */
-    /* Caller should fill in tcmsg->tcm_parent. */
-
-    return tcmsg;
-}
-
-static int
-tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
-{
-    int error = nl_transact(NETLINK_ROUTE, request, replyp);
-    ofpbuf_uninit(request);
-    return error;
-}
-
-/* Adds or deletes a root ingress qdisc on 'netdev'.  We use this for
- * policing configuration.
- *
- * This function is equivalent to running the following when 'add' is true:
- *     /sbin/tc qdisc add dev <devname> handle ffff: ingress
- *
- * This function is equivalent to running the following when 'add' is false:
- *     /sbin/tc qdisc del dev <devname> handle ffff: ingress
- *
- * The configuration and stats may be seen with the following command:
- *     /sbin/tc -s qdisc show dev <devname>
- *
- * Returns 0 if successful, otherwise a positive errno value.
- */
-static int
-tc_add_del_ingress_qdisc(struct netdev *netdev, bool add)
-{
-    struct ofpbuf request;
-    struct tcmsg *tcmsg;
-    int error;
-    int type = add ? RTM_NEWQDISC : RTM_DELQDISC;
-    int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
-
-    tcmsg = tc_make_request(netdev, type, flags, &request);
-    if (!tcmsg) {
-        return ENODEV;
-    }
-    tcmsg->tcm_handle = tc_make_handle(0xffff, 0);
-    tcmsg->tcm_parent = TC_H_INGRESS;
-    nl_msg_put_string(&request, TCA_KIND, "ingress");
-    nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
-
-    error = tc_transact(&request, NULL);
-    if (error) {
-        /* If we're deleting the qdisc, don't worry about some of the
-         * error conditions. */
-        if (!add && (error == ENOENT || error == EINVAL)) {
-            return 0;
-        }
-        return error;
-    }
-
-    return 0;
+    return tc_make_request(ifindex, type, flags, request);
 }
 
 /* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size
@@ -4748,8 +4662,8 @@ tc_add_policer(struct netdev *netdev,
     tc_police.burst = tc_bytes_to_ticks(
         tc_police.rate.rate, MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 8);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWTFILTER,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTFILTER,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5014,7 +4928,8 @@ tc_query_class(const struct netdev *netdev,
     struct tcmsg *tcmsg;
     int error;
 
-    tcmsg = tc_make_request(netdev, RTM_GETTCLASS, NLM_F_ECHO, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_GETTCLASS,
+                                         NLM_F_ECHO, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5040,7 +4955,7 @@ tc_delete_class(const struct netdev *netdev, unsigned int handle)
     struct tcmsg *tcmsg;
     int error;
 
-    tcmsg = tc_make_request(netdev, RTM_DELTCLASS, 0, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_DELTCLASS, 0, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5066,7 +4981,7 @@ tc_del_qdisc(struct netdev *netdev_)
     struct tcmsg *tcmsg;
     int error;
 
-    tcmsg = tc_make_request(netdev_, RTM_DELQDISC, 0, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev_, RTM_DELQDISC, 0, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5147,7 +5062,8 @@ tc_query_qdisc(const struct netdev *netdev_)
      * in such a case we get no response at all from the kernel (!) if a
      * builtin qdisc is in use (which is later caught by "!error &&
      * !qdisc->size"). */
-    tcmsg = tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC,
+                                         NLM_F_ECHO, &request);
     if (!tcmsg) {
         return ENODEV;
     }
diff --git a/lib/tc.c b/lib/tc.c
new file mode 100644
index 0000000..0c9c15e
--- /dev/null
+++ b/lib/tc.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <linux/gen_stats.h>
+#include "timeval.h"
+#include "netlink-socket.h"
+#include "netlink.h"
+#include "rtnetlink.h"
+#include "openvswitch/vlog.h"
+#include "openvswitch/ofpbuf.h"
+#include "tc.h"
+#include "util.h"
+#include "byte-order.h"
+
+VLOG_DEFINE_THIS_MODULE(tc);
+
+static struct vlog_rate_limit parse_err = VLOG_RATE_LIMIT_INIT(5, 5);
+
+/* Returns tc handle 'major':'minor'. */
+unsigned int
+tc_make_handle(unsigned int major, unsigned int minor)
+{
+    return TC_H_MAKE(major << 16, minor);
+}
+
+/* Returns the major number from 'handle'. */
+unsigned int
+tc_get_major(unsigned int handle)
+{
+    return TC_H_MAJ(handle) >> 16;
+}
+
+/* Returns the minor number from 'handle'. */
+unsigned int
+tc_get_minor(unsigned int handle)
+{
+    return TC_H_MIN(handle);
+}
+
+struct tcmsg *
+tc_make_request(int ifindex, int type, unsigned int flags,
+                struct ofpbuf *request)
+{
+    struct tcmsg *tcmsg;
+
+    ofpbuf_init(request, 512);
+    nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
+    tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
+    tcmsg->tcm_family = AF_UNSPEC;
+    tcmsg->tcm_ifindex = ifindex;
+    /* Caller should fill in tcmsg->tcm_handle. */
+    /* Caller should fill in tcmsg->tcm_parent. */
+
+    return tcmsg;
+}
+
+int
+tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
+{
+    int error = nl_transact(NETLINK_ROUTE, request, replyp);
+    ofpbuf_uninit(request);
+    return error;
+}
+
+/* Adds or deletes a root ingress qdisc on device with specified ifindex.
+ *
+ * This function is equivalent to running the following when 'add' is true:
+ *     /sbin/tc qdisc add dev <devname> handle ffff: ingress
+ *
+ * This function is equivalent to running the following when 'add' is false:
+ *     /sbin/tc qdisc del dev <devname> handle ffff: ingress
+ *
+ * Where dev <devname> is the device with specified ifindex name.
+ *
+ * The configuration and stats may be seen with the following command:
+ *     /sbin/tc -s qdisc show dev <devname>
+ *
+ *
+ * Returns 0 if successful, otherwise a positive errno value.
+ */
+int
+tc_add_del_ingress_qdisc(int ifindex, bool add)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    int error;
+    int type = add ? RTM_NEWQDISC : RTM_DELQDISC;
+    int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
+
+    tcmsg = tc_make_request(ifindex, type, flags, &request);
+    tcmsg->tcm_handle = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_parent = TC_H_INGRESS;
+    nl_msg_put_string(&request, TCA_KIND, "ingress");
+    nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
+
+    error = tc_transact(&request, NULL);
+    if (error) {
+        /* If we're deleting the qdisc, don't worry about some of the
+         * error conditions. */
+        if (!add && (error == ENOENT || error == EINVAL)) {
+            return 0;
+        }
+        return error;
+    }
+
+    return 0;
+}
+
+static const struct nl_policy tca_policy[] = {
+    [TCA_KIND] = { .type = NL_A_STRING, .optional = false, },
+    [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_STATS] = { .type = NL_A_UNSPEC,
+                    .min_len = sizeof(struct tc_stats), .optional = true, },
+    [TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
+};
+
+static const struct nl_policy tca_flower_policy[] = {
+    [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
+                           .optional = true, },
+    [TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
+                                 .min_len = ETH_ALEN, .optional = true, },
+    [TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
+                                 .min_len = ETH_ALEN, .optional = true, },
+    [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                      .min_len = ETH_ALEN,
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
+                                      .min_len = ETH_ALEN,
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
+    [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
+    [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct in6_addr),
+                                  .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct in6_addr),
+                                  .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                       .min_len = sizeof(struct in6_addr),
+                                       .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+                                       .min_len = sizeof(struct in6_addr),
+                                       .optional = true, },
+    [TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_BE16,
+                                          .optional = true, },
+};
+
+static void
+nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower)
+{
+    const struct eth_addr *eth;
+
+    if (attrs[TCA_FLOWER_KEY_ETH_SRC_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN);
+        memcpy(&flower->key.src_mac, eth, sizeof flower->key.src_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN);
+        memcpy(&flower->mask.src_mac, eth, sizeof flower->mask.src_mac);
+    }
+    if (attrs[TCA_FLOWER_KEY_ETH_DST_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN);
+        memcpy(&flower->key.dst_mac, eth, sizeof flower->key.dst_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN);
+        memcpy(&flower->mask.dst_mac, eth, sizeof flower->mask.dst_mac);
+    }
+}
+
+static void
+nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower)
+{
+    if (flower->key.eth_type != htons(ETH_P_8021Q)) {
+        return;
+    }
+
+    flower->key.encap_eth_type =
+        nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
+
+    if (attrs[TCA_FLOWER_KEY_VLAN_ID]) {
+        flower->key.vlan_id =
+            nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]);
+    }
+    if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) {
+        flower->key.vlan_prio =
+            nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]);
+    }
+}
+
+static void
+nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower)
+{
+    if (attrs[TCA_FLOWER_KEY_ENC_KEY_ID]) {
+        ovs_be32 id = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_KEY_ID]);
+
+        flower->tunnel.id = be32_to_be64(id);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]) {
+        flower->tunnel.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
+        flower->tunnel.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) {
+        flower->tunnel.tp_dst =
+            nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]);
+    }
+}
+
+static void
+nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
+    uint8_t ip_proto = 0;
+    const size_t ipv6_size = sizeof flower->mask.ipv6.ipv6_src;
+    struct tc_flower_key *key = &flower->key;
+    struct tc_flower_key *mask = &flower->mask;
+
+    if (attrs[TCA_FLOWER_KEY_IP_PROTO]) {
+        ip_proto = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_PROTO]);
+        key->ip_proto = ip_proto;
+        mask->ip_proto = UINT8_MAX;
+    }
+
+    if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) {
+        key->ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]);
+        mask->ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]) {
+        key->ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST]);
+        mask->ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK]) {
+        struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_SRC];
+        struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK];
+        const void *data = nl_attr_get_unspec(attr, ipv6_size);
+        const void *mask_data = nl_attr_get_unspec(attr_mask, ipv6_size);
+
+        memcpy(&key->ipv6.ipv6_src, data, ipv6_size);
+        memcpy(&mask->ipv6.ipv6_src, mask_data, ipv6_size);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV6_DST_MASK]) {
+        struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_DST];
+        struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_DST_MASK];
+        const void *data = nl_attr_get_unspec(attr, ipv6_size);
+        const void *mask_data = nl_attr_get_unspec(attr_mask, ipv6_size);
+
+        memcpy(&key->ipv6.ipv6_dst, data, ipv6_size);
+        memcpy(&mask->ipv6.ipv6_dst, mask_data, ipv6_size);
+    }
+
+    if (ip_proto == IPPROTO_TCP) {
+        if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
+            key->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
+            mask->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
+        }
+        if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
+            key->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
+            mask->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
+        }
+    } else if (ip_proto == IPPROTO_UDP) {
+        if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
+            key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
+            mask->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
+        }
+        if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
+            key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
+            mask->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
+        }
+    }
+}
+
+static const struct nl_policy tunnel_key_policy[] = {
+    [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
+                               .min_len = sizeof(struct tc_tunnel_key),
+                               .optional = false, },
+    [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_DST_PORT] = { .type = NL_A_BE16, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, },
+};
+
+static int
+nl_parse_act_tunnel_key(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *tun_attrs[ARRAY_SIZE(tunnel_key_policy)];
+    const struct nlattr *tun_parms;
+    const struct tc_tunnel_key *tun;
+
+    if (!nl_parse_nested(options, tunnel_key_policy, tun_attrs,
+                ARRAY_SIZE(tunnel_key_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse tunnel_key action options");
+        return EPROTO;
+    }
+
+    tun_parms = tun_attrs[TCA_TUNNEL_KEY_PARMS];
+    tun = nl_attr_get_unspec(tun_parms, sizeof *tun);
+    if (tun->t_action == TCA_TUNNEL_KEY_ACT_SET) {
+        struct nlattr *id = tun_attrs[TCA_TUNNEL_KEY_ENC_KEY_ID];
+        struct nlattr *dst_port = tun_attrs[TCA_TUNNEL_KEY_ENC_DST_PORT];
+        struct nlattr *ipv4_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_SRC];
+        struct nlattr *ipv4_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_DST];
+
+        flower->set.set = true;
+        flower->set.ipv4_src = ipv4_src ? nl_attr_get_be32(ipv4_src) : 0;
+        flower->set.ipv4_dst = ipv4_dst ? nl_attr_get_be32(ipv4_dst) : 0;
+        flower->set.id = id ? be32_to_be64(nl_attr_get_be32(id)) : 0;
+        flower->set.tp_dst = dst_port ? nl_attr_get_be16(dst_port) : 0;
+    } else if (tun->t_action == TCA_TUNNEL_KEY_ACT_RELEASE) {
+        flower->tunnel.tunnel = true;
+    } else {
+        VLOG_ERR_RL(&parse_err, "unknown tunnel actions: %d, %d",
+                    tun->action, tun->t_action);
+        return EINVAL;
+    }
+    return 0;
+}
+
+static const struct nl_policy gact_policy[] = {
+    [TCA_GACT_PARMS] = { .type = NL_A_UNSPEC,
+                         .min_len = sizeof(struct tc_gact),
+                         .optional = false, },
+    [TCA_GACT_TM] = { .type = NL_A_UNSPEC,
+                      .min_len = sizeof(struct tcf_t),
+                      .optional = false, },
+};
+
+static void
+nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
+{
+    unsigned long long int lastuse = tm->lastuse * 10;
+    unsigned long long int now = time_msec();
+
+    flower->lastused = now - lastuse;
+}
+
+static int
+nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
+    const struct tc_gact *p;
+    struct nlattr *gact_parms;
+    const struct tcf_t *tm;
+
+    if (!nl_parse_nested(options, gact_policy, gact_attrs,
+                         ARRAY_SIZE(gact_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse gact action options");
+        return EPROTO;
+    }
+
+    gact_parms = gact_attrs[TCA_GACT_PARMS];
+    p = nl_attr_get_unspec(gact_parms, sizeof *p);
+
+    if (p->action == TC_ACT_SHOT) {
+    } else {
+            VLOG_ERR_RL(&parse_err, "unknown gact action: %d", p->action);
+            return EINVAL;
+    }
+
+    tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm);
+    nl_parse_tcf(tm, flower);
+
+    return 0;
+}
+
+static const struct nl_policy mirred_policy[] = {
+    [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
+                           .min_len = sizeof(struct tc_mirred),
+                           .optional = false, },
+    [TCA_MIRRED_TM] = { .type = NL_A_UNSPEC,
+                        .min_len = sizeof(struct tcf_t),
+                        .optional = false, },
+};
+
+static int
+nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower)
+{
+
+    struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)];
+    const struct tc_mirred *m;
+    const struct nlattr *mirred_parms;
+    const struct tcf_t *tm;
+    struct nlattr *mirred_tm;
+
+    if (!nl_parse_nested(options, mirred_policy, mirred_attrs,
+                         ARRAY_SIZE(mirred_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse mirred action options");
+        return EPROTO;
+    }
+
+    mirred_parms = mirred_attrs[TCA_MIRRED_PARMS];
+    m = nl_attr_get_unspec(mirred_parms, sizeof *m);
+
+    if (m->action != TC_ACT_STOLEN ||  m->eaction != TCA_EGRESS_REDIR) {
+        VLOG_ERR_RL(&parse_err, "unknown mirred action: %d, %d, %d",
+                 m->action, m->eaction, m->ifindex);
+        return EINVAL;
+    }
+
+    flower->ifindex_out = m->ifindex;
+
+    mirred_tm = mirred_attrs[TCA_MIRRED_TM];
+    tm = nl_attr_get_unspec(mirred_tm, sizeof *tm);
+    nl_parse_tcf(tm, flower);
+
+    return 0;
+}
+
+static const struct nl_policy vlan_policy[] = {
+    [TCA_VLAN_PARMS] = { .type = NL_A_UNSPEC,
+                         .min_len = sizeof(struct tc_vlan),
+                         .optional = false, },
+    [TCA_VLAN_PUSH_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+    [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NL_A_U16, .optional = true, },
+    [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NL_A_U8, .optional = true, },
+};
+
+static int
+nl_parse_act_vlan(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *vlan_attrs[ARRAY_SIZE(vlan_policy)];
+    const struct tc_vlan *v;
+    const struct nlattr *vlan_parms;
+
+    if (!nl_parse_nested(options, vlan_policy, vlan_attrs,
+                         ARRAY_SIZE(vlan_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse vlan action options");
+        return EPROTO;
+    }
+
+    vlan_parms = vlan_attrs[TCA_VLAN_PARMS];
+    v = nl_attr_get_unspec(vlan_parms, sizeof *v);
+    if (v->v_action == TCA_VLAN_ACT_PUSH) {
+        struct nlattr *vlan_id = vlan_attrs[TCA_VLAN_PUSH_VLAN_ID];
+        struct nlattr *vlan_prio = vlan_attrs[TCA_VLAN_PUSH_VLAN_PRIORITY];
+
+        flower->vlan_push_id = nl_attr_get_u16(vlan_id);
+        flower->vlan_push_prio = nl_attr_get_u8(vlan_prio);
+    } else if (v->v_action == TCA_VLAN_ACT_POP) {
+        flower->vlan_pop = 1;
+    } else {
+        VLOG_ERR_RL(&parse_err, "unknown vlan action: %d, %d",
+                    v->action, v->v_action);
+        return EINVAL;
+    }
+    return 0;
+}
+
+static const struct nl_policy act_policy[] = {
+    [TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, },
+    [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, },
+};
+
+static const struct nl_policy stats_policy[] = {
+    [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
+                          .min_len = sizeof(struct gnet_stats_basic),
+                          .optional = false, },
+};
+
+static int
+nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
+{
+    struct nlattr *act_options;
+    struct nlattr *act_stats;
+    const struct nlattr *stats_basic;
+    const char *act_kind;
+    struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
+    struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
+    struct ovs_flow_stats *stats = &flower->stats;
+    const struct gnet_stats_basic *bs;
+
+    if (!nl_parse_nested(action, act_policy, action_attrs,
+                         ARRAY_SIZE(act_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse single action options");
+        return EPROTO;
+    }
+
+    act_kind = nl_attr_get_string(action_attrs[TCA_ACT_KIND]);
+    act_options = action_attrs[TCA_ACT_OPTIONS];
+
+    if (!strcmp(act_kind, "gact")) {
+        nl_parse_act_drop(act_options, flower);
+    } else if (!strcmp(act_kind, "mirred")) {
+        nl_parse_act_mirred(act_options, flower);
+    } else if (!strcmp(act_kind, "vlan")) {
+        nl_parse_act_vlan(act_options, flower);
+    } else if (!strcmp(act_kind, "tunnel_key")) {
+        nl_parse_act_tunnel_key(act_options, flower);
+    } else {
+        VLOG_ERR_RL(&parse_err, "unknown tc action kind: %s", act_kind);
+        return EINVAL;
+    }
+
+    act_stats = action_attrs[TCA_ACT_STATS];
+
+    if (!nl_parse_nested(act_stats, stats_policy, stats_attrs,
+                         ARRAY_SIZE(stats_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse action stats policy");
+        return EPROTO;
+    }
+
+    stats_basic = stats_attrs[TCA_STATS_BASIC];
+    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
+
+    stats->n_packets.lo = bs->packets;
+    stats->n_packets.hi = 0;
+    stats->n_bytes.hi = bs->bytes >> 32;
+    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
+
+    return 0;
+}
+
+#define TCA_ACT_MIN_PRIO 1
+
+static int
+nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower)
+{
+    const struct nlattr *actions = attrs[TCA_FLOWER_ACT];
+    static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO + 1] = {};
+    struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+    const int max_size = ARRAY_SIZE(actions_orders_policy);
+
+    for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
+        actions_orders_policy[i].type = NL_A_NESTED;
+        actions_orders_policy[i].optional = true;
+    }
+
+    if (!nl_parse_nested(actions, actions_orders_policy, actions_orders,
+                         ARRAY_SIZE(actions_orders_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse flower order of actions");
+        return EPROTO;
+    }
+
+    for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
+        if (actions_orders[i]) {
+            int err = nl_parse_single_action(actions_orders[i], flower);
+
+            if (err) {
+                return err;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower)
+{
+    struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
+
+    if (!nl_parse_nested(nl_options, tca_flower_policy,
+                         attrs, ARRAY_SIZE(tca_flower_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse flower classifier options");
+        return EPROTO;
+    }
+
+    nl_parse_flower_eth(attrs, flower);
+    nl_parse_flower_vlan(attrs, flower);
+    nl_parse_flower_ip(attrs, flower);
+    nl_parse_flower_tunnel(attrs, flower);
+    return nl_parse_flower_actions(attrs, flower);
+}
+
+int
+parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
+{
+    struct tcmsg *tc;
+    struct nlattr *ta[ARRAY_SIZE(tca_policy)];
+    const char *kind;
+
+    memset(flower, 0, sizeof *flower);
+    if (NLMSG_HDRLEN + (sizeof *tc) > reply->size) {
+        return EPROTO;
+    }
+
+    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+    flower->handle = tc->tcm_handle;
+    flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
+    flower->mask.eth_type = OVS_BE16_MAX;
+    flower->prio = tc_get_major(tc->tcm_info);
+
+    if (!flower->handle) {
+        return EAGAIN;
+    }
+
+    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
+                         tca_policy, ta, ARRAY_SIZE(ta))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse tca policy");
+        return EPROTO;
+    }
+
+    kind = nl_attr_get_string(ta[TCA_KIND]);
+    if (strcmp(kind, "flower")) {
+        VLOG_ERR_RL(&parse_err, "failed to parse filter: not flower");
+        return EPROTO;
+    }
+
+    return nl_parse_flower_options(ta[TCA_OPTIONS], flower);
+}
+
+int
+tc_dump_flower_start(int ifindex, struct nl_dump *dump)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = TC_H_UNSPEC;
+    tcmsg->tcm_handle = 0;
+
+    nl_dump_start(dump, NETLINK_ROUTE, &request);
+    ofpbuf_uninit(&request);
+
+    return 0;
+}
+
+int
+tc_flush(int ifindex)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = TC_H_UNSPEC;
+
+    return tc_transact(&request, NULL);
+}
+
+int
+tc_del_filter(int ifindex, int prio, int handle)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error;
+
+    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        ofpbuf_delete(reply);
+    }
+    return error;
+}
+
+int
+tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error;
+
+    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    error = tc_transact(&request, &reply);
+    if (error) {
+        return error;
+    }
+
+    error = parse_netlink_to_tc_flower(reply, flower);
+    ofpbuf_delete(reply);
+    return error;
+}
+
+static void
+nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { .action = TC_ACT_PIPE,
+                                .v_action = TCA_VLAN_ACT_PUSH };
+
+        nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
+        nl_msg_put_u16(request, TCA_VLAN_PUSH_VLAN_ID, vid);
+        nl_msg_put_u8(request, TCA_VLAN_PUSH_VLAN_PRIORITY, prio);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_pop_vlan(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { .action = TC_ACT_PIPE,
+                                .v_action = TCA_VLAN_ACT_POP };
+
+        nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_tunnel_key_release(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
+                                     .t_action = TCA_TUNNEL_KEY_ACT_RELEASE };
+
+        nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, ovs_be64 id,
+                                ovs_be32 ipv4_src, ovs_be32 ipv4_dst,
+                                ovs_be16 tp_dst)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
+                                     .t_action = TCA_TUNNEL_KEY_ACT_SET };
+
+        nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
+
+        ovs_be32 id32 = be64_to_be32(id);
+        nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32);
+        nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src);
+        nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst);
+        nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_DST_PORT, tp_dst);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_drop(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "gact");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_gact p = { .action = TC_ACT_SHOT };
+
+        nl_msg_put_unspec(request, TCA_GACT_PARMS, &p, sizeof p);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_redirect(struct ofpbuf *request, int ifindex)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "mirred");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_mirred m = { .action = TC_ACT_STOLEN,
+                               .eaction = TCA_EGRESS_REDIR,
+                               .ifindex = ifindex };
+
+        nl_msg_put_unspec(request, TCA_MIRRED_PARMS, &m, sizeof m);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
+{
+    size_t offset;
+    size_t act_offset;
+
+    offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
+    {
+        uint16_t act_index = 1;
+
+        if (flower->set.set) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_tunnel_key_set(request, flower->set.id,
+                                          flower->set.ipv4_src,
+                                          flower->set.ipv4_dst,
+                                          flower->set.tp_dst);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->tunnel.tunnel) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_tunnel_key_release(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->vlan_pop) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_pop_vlan(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->vlan_push_id) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_push_vlan(request,
+                                     flower->vlan_push_id,
+                                     flower->vlan_push_prio);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->ifindex_out) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_redirect(request, flower->ifindex_out);
+            nl_msg_end_nested(request, act_offset);
+        } else {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_drop(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_masked_value(struct ofpbuf *request, uint16_t type,
+                        uint16_t mask_type, const void *data,
+                        const void *mask_data, size_t len)
+{
+    if (mask_type != TCA_FLOWER_UNSPEC) {
+        if (is_all_zeros(mask_data, len)) {
+            return;
+        }
+        nl_msg_put_unspec(request, mask_type, mask_data, len);
+    }
+    nl_msg_put_unspec(request, type, data, len);
+}
+
+static void
+nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
+{
+    ovs_be32 ipv4_src = flower->tunnel.ipv4_src;
+    ovs_be32 ipv4_dst = flower->tunnel.ipv4_dst;
+    ovs_be16 tp_dst = flower->tunnel.tp_dst;
+    ovs_be32 id = be64_to_be32(flower->tunnel.id);
+
+    nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id);
+    nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src);
+    nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst);
+    nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst);
+}
+
+static void
+nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
+{
+    uint16_t host_eth_type = ntohs(flower->key.eth_type);
+
+    nl_msg_put_masked_value(request,
+                            TCA_FLOWER_KEY_ETH_DST,
+                            TCA_FLOWER_KEY_ETH_DST_MASK,
+                            &flower->key.dst_mac,
+                            &flower->mask.dst_mac, ETH_ALEN);
+    nl_msg_put_masked_value(request,
+                            TCA_FLOWER_KEY_ETH_SRC,
+                            TCA_FLOWER_KEY_ETH_SRC_MASK,
+                            &flower->key.src_mac,
+                            &flower->mask.src_mac, ETH_ALEN);
+
+    if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) {
+        if (flower->mask.ip_proto && flower->key.ip_proto) {
+            nl_msg_put_u8(request, TCA_FLOWER_KEY_IP_PROTO,
+                          flower->key.ip_proto);
+        }
+        if (flower->key.ip_proto == IPPROTO_UDP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_UDP_SRC,
+                                    TCA_FLOWER_KEY_UDP_SRC_MASK,
+                                    &flower->key.src_port,
+                                    &flower->mask.src_port, 2);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_UDP_DST,
+                                    TCA_FLOWER_KEY_UDP_DST_MASK,
+                                    &flower->key.dst_port,
+                                    &flower->mask.dst_port, 2);
+        } else if (flower->key.ip_proto == IPPROTO_TCP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_TCP_SRC,
+                                    TCA_FLOWER_KEY_TCP_SRC_MASK,
+                                    &flower->key.src_port,
+                                    &flower->mask.src_port, 2);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_TCP_DST,
+                                    TCA_FLOWER_KEY_TCP_DST_MASK,
+                                    &flower->key.dst_port,
+                                    &flower->mask.dst_port, 2);
+        }
+    }
+    if (host_eth_type == ETH_P_IP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV4_SRC,
+                                    TCA_FLOWER_KEY_IPV4_SRC_MASK,
+                                    &flower->key.ipv4.ipv4_src,
+                                    &flower->mask.ipv4.ipv4_src,
+                                    sizeof flower->key.ipv4.ipv4_src);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV4_DST,
+                                    TCA_FLOWER_KEY_IPV4_DST_MASK,
+                                    &flower->key.ipv4.ipv4_dst,
+                                    &flower->mask.ipv4.ipv4_dst,
+                                    sizeof flower->key.ipv4.ipv4_dst);
+    } else if (host_eth_type == ETH_P_IPV6) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV6_SRC,
+                                    TCA_FLOWER_KEY_IPV6_SRC_MASK,
+                                    &flower->key.ipv6.ipv6_src,
+                                    &flower->mask.ipv6.ipv6_src,
+                                    sizeof flower->key.ipv6.ipv6_src);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV6_DST,
+                                    TCA_FLOWER_KEY_IPV6_DST_MASK,
+                                    &flower->key.ipv6.ipv6_dst,
+                                    &flower->mask.ipv6.ipv6_dst,
+                                    sizeof flower->key.ipv6.ipv6_dst);
+    }
+
+    nl_msg_put_be16(request, TCA_FLOWER_KEY_ETH_TYPE, flower->key.eth_type);
+
+    if (host_eth_type == ETH_P_8021Q) {
+        if (flower->key.vlan_id || flower->key.vlan_prio) {
+            nl_msg_put_u16(request, TCA_FLOWER_KEY_VLAN_ID,
+                           flower->key.vlan_id);
+            nl_msg_put_u8(request, TCA_FLOWER_KEY_VLAN_PRIO,
+                          flower->key.vlan_prio);
+        }
+        if (flower->key.encap_eth_type) {
+            nl_msg_put_be16(request, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                            flower->key.encap_eth_type);
+        }
+    }
+
+    nl_msg_put_u32(request, TCA_FLOWER_FLAGS, 0);
+
+    if (flower->tunnel.tunnel) {
+        nl_msg_put_flower_tunnel(request, flower);
+    }
+
+    nl_msg_put_flower_acts(request, flower);
+}
+
+int
+tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
+                  struct tc_flower *flower)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error = 0;
+    size_t basic_offset;
+    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
+
+    tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER,
+                            NLM_F_CREATE | NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, eth_type);
+    tcmsg->tcm_handle = handle;
+
+    nl_msg_put_string(&request, TCA_KIND, "flower");
+    basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+    {
+        nl_msg_put_flower_options(&request, flower);
+    }
+    nl_msg_end_nested(&request, basic_offset);
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        struct tcmsg *tc =
+            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+
+        flower->prio = tc_get_major(tc->tcm_info);
+        flower->handle = tc->tcm_handle;
+        ofpbuf_delete(reply);
+    }
+
+    return error;
+}
diff --git a/lib/tc.h b/lib/tc.h
new file mode 100644
index 0000000..5ca6c55
--- /dev/null
+++ b/lib/tc.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC_H
+#define TC_H 1
+
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include "odp-netlink.h"
+#include "netlink-socket.h"
+
+#define TC_POLICY_DEFAULT "none"
+
+unsigned int tc_make_handle(unsigned int major, unsigned int minor);
+unsigned int tc_get_major(unsigned int handle);
+unsigned int tc_get_minor(unsigned int handle);
+struct tcmsg *tc_make_request(int ifindex, int type, unsigned int flags,
+                              struct ofpbuf *request);
+int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
+int tc_add_del_ingress_qdisc(int ifindex, bool add);
+
+struct tc_flower_key {
+    ovs_be16 eth_type;
+    uint8_t ip_proto;
+
+    struct eth_addr dst_mac;
+    struct eth_addr src_mac;
+
+    ovs_be16 src_port;
+    ovs_be16 dst_port;
+
+    uint16_t vlan_id;
+    uint8_t vlan_prio;
+
+    ovs_be16 encap_eth_type;
+    uint8_t encap_ip_proto;
+    union {
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } encap_ipv4;
+        struct {
+            ovs_be32 ipv6_src[4];
+            ovs_be32 ipv6_dst[4];
+        } encap_ipv6;
+    };
+
+    union {
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            ovs_be32 ipv6_src[4];
+            ovs_be32 ipv6_dst[4];
+        } ipv6;
+    };
+};
+
+struct tc_flower {
+    uint32_t handle;
+    uint32_t prio;
+
+    struct tc_flower_key key;
+    struct tc_flower_key mask;
+
+    uint8_t vlan_pop;
+    uint16_t vlan_push_id;
+    uint8_t vlan_push_prio;
+
+    int ifindex;
+    int ifindex_out;
+
+    struct ovs_flow_stats stats;
+    uint64_t lastused;
+
+    struct {
+        bool set;
+        ovs_be32 ipv4_src;
+        ovs_be32 ipv4_dst;
+        ovs_be64 id;
+        ovs_be16 tp_src;
+        ovs_be16 tp_dst;
+    } set;
+
+    struct {
+        bool tunnel;
+        ovs_be32 ipv4_src;
+        ovs_be32 ipv4_dst;
+        ovs_be64 id;
+        ovs_be16 tp_src;
+        ovs_be16 tp_dst;
+    } tunnel;
+};
+
+int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
+                      struct tc_flower *flower);
+int tc_del_filter(int ifindex, int prio, int handle);
+int tc_get_flower(int ifindex, int prio, int handle,
+                  struct tc_flower *flower);
+int tc_flush(int ifindex);
+int tc_dump_flower_start(int ifindex, struct nl_dump *dump);
+int parse_netlink_to_tc_flower(struct ofpbuf *reply,
+                               struct tc_flower *flower);
+
+#endif /* tc.h */
-- 
1.7.1



More information about the dev mailing list