[ovs-dev] [PATCH OVS 3/4] ovs-tc: allow offloading TC rules to egress qdiscs

John Hurley john.hurley at netronome.com
Tue Apr 2 17:15:23 UTC 2019


On Tue, Apr 2, 2019 at 4:20 PM Roi Dayan <roid at mellanox.com> wrote:
>
>
>
> On 02/04/2019 12:27, John Hurley wrote:
> > Offloading rules to a TC datapath only allows the creating of ingress hook
> > qdiscs and the application of filters to these. However, there may be
> > certain situations where an egress qdisc is more applicable (e.g. when
> > offloading to TC rules applied to OvS internal ports).
> >
> > Extend the TC API in OvS to allow the creation of egress qdiscs and to add
> > or interact with flower filters applied to these.
> >
> > Signed-off-by: John Hurley <john.hurley at netronome.com>
> > Reviewed-by: Simon Horman <simon.horman at netronome.com>
> > ---
> >  lib/netdev-linux.c       | 16 +++++++-------
> >  lib/netdev-tc-offloads.c | 32 +++++++++++++--------------
> >  lib/tc.c                 | 56 +++++++++++++++++++++++++++++++++---------------
> >  lib/tc.h                 | 19 ++++++++++------
> >  4 files changed, 76 insertions(+), 47 deletions(-)
> >
> > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> > index deedc69..3e9c450 100644
> > --- a/lib/netdev-linux.c
> > +++ b/lib/netdev-linux.c
> > @@ -747,10 +747,10 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
> >                  lag->node = shash_add(&lag_shash, change->ifname, lag);
> >
> >                  /* delete ingress block in case it exists */
> > -                tc_add_del_ingress_qdisc(change->if_index, false, 0);
> > +                tc_add_del_qdisc(change->if_index, false, 0, false);
> >                  /* LAG master is linux netdev so add slave to same block. */
> > -                error = tc_add_del_ingress_qdisc(change->if_index, true,
> > -                                                 block_id);
> > +                error = tc_add_del_qdisc(change->if_index, true, block_id,
> > +                                         false);
> >                  if (error) {
> >                      VLOG_WARN("failed to bind LAG slave %s to master's block",
> >                                change->ifname);
> > @@ -766,8 +766,7 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
> >          lag = shash_find_data(&lag_shash, change->ifname);
> >
> >          if (lag) {
> > -            tc_add_del_ingress_qdisc(change->if_index, false,
> > -                                     lag->block_id);
> > +            tc_add_del_qdisc(change->if_index, false, lag->block_id, false);
> >              shash_delete(&lag_shash, lag->node);
> >              free(lag);
> >          }
> > @@ -2430,7 +2429,8 @@ tc_del_matchall_policer(struct netdev *netdev)
> >          return err;
> >      }
> >
> > -    err = tc_del_filter(ifindex, TC_RESERVED_PRIORITY_POLICE, 1, block_id);
> > +    err = tc_del_filter(ifindex, TC_RESERVED_PRIORITY_POLICE, 1, block_id,
> > +                        false);
> >      if (err) {
> >          return err;
> >      }
> > @@ -2486,7 +2486,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
> >
> >      COVERAGE_INC(netdev_set_policing);
> >      /* Remove any existing ingress qdisc. */
> > -    error = tc_add_del_ingress_qdisc(ifindex, false, 0);
> > +    error = tc_add_del_qdisc(ifindex, false, 0, false);
> >      if (error) {
> >          VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
> >                       netdev_name, ovs_strerror(error));
> > @@ -2494,7 +2494,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
> >      }
> >
> >      if (kbits_rate) {
> > -        error = tc_add_del_ingress_qdisc(ifindex, true, 0);
> > +        error = tc_add_del_qdisc(ifindex, true, 0, false);
> >          if (error) {
> >              VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
> >                           netdev_name, ovs_strerror(error));
> > diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
> > index 78ad023..11c597a 100644
> > --- a/lib/netdev-tc-offloads.c
> > +++ b/lib/netdev-tc-offloads.c
> > @@ -189,7 +189,7 @@ del_filter_and_ufid_mapping(int ifindex, int prio, int handle,
> >  {
> >      int err;
> >
> > -    err = tc_del_filter(ifindex, prio, handle, block_id);
> > +    err = tc_del_filter(ifindex, prio, handle, block_id, false);
> >      del_ufid_tc_mapping(ufid);
> >
> >      return err;
> > @@ -357,7 +357,7 @@ netdev_tc_flow_flush(struct netdev *netdev)
> >
> >      block_id = get_block_id_from_netdev(netdev);
> >
> > -    return tc_flush(ifindex, block_id);
> > +    return tc_flush(ifindex, block_id, false);
> >  }
> >
> >  int
> > @@ -379,7 +379,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
> >      dump = xzalloc(sizeof *dump);
> >      dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
> >      dump->netdev = netdev_ref(netdev);
> > -    tc_dump_flower_start(ifindex, dump->nl_dump, block_id);
> > +    tc_dump_flower_start(ifindex, dump->nl_dump, block_id, false);
> >
> >      *dump_out = dump;
> >
> > @@ -1356,7 +1356,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
> >      flower.act_cookie.data = ufid;
> >      flower.act_cookie.len = sizeof *ufid;
> >
> > -    err = tc_replace_flower(ifindex, prio, handle, &flower, block_id);
> > +    err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, false);
> >      if (!err) {
> >          add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex);
> >      }
> > @@ -1399,7 +1399,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED,
> >      block_id = get_block_id_from_netdev(dev);
> >      VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d block_id %d)",
> >                  netdev_get_name(dev), prio, handle, block_id);
> > -    err = tc_get_flower(ifindex, prio, handle, &flower, block_id);
> > +    err = tc_get_flower(ifindex, prio, handle, &flower, block_id, false);
> >      netdev_close(dev);
> >      if (err) {
> >          VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s",
> > @@ -1446,7 +1446,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
> >
> >      if (stats) {
> >          memset(stats, 0, sizeof *stats);
> > -        if (!tc_get_flower(ifindex, prio, handle, &flower, block_id)) {
> > +        if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, false)) {
> >              stats->n_packets = get_32aligned_u64(&flower.stats.n_packets);
> >              stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes);
> >              stats->used = flower.lastused;
> > @@ -1467,7 +1467,7 @@ probe_multi_mask_per_prio(int ifindex)
> >      int block_id = 0;
> >      int error;
> >
> > -    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
> > +    error = tc_add_del_qdisc(ifindex, true, block_id, false);
> >      if (error) {
> >          return;
> >      }
> > @@ -1479,7 +1479,7 @@ probe_multi_mask_per_prio(int ifindex)
> >      memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
> >      memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac);
> >
> > -    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id);
> > +    error = tc_replace_flower(ifindex, 1, 1, &flower, block_id, false);
> >      if (error) {
> >          goto out;
> >      }
> > @@ -1487,20 +1487,20 @@ probe_multi_mask_per_prio(int ifindex)
> >      memset(&flower.key.src_mac, 0x11, sizeof flower.key.src_mac);
> >      memset(&flower.mask.src_mac, 0xff, sizeof flower.mask.src_mac);
> >
> > -    error = tc_replace_flower(ifindex, 1, 2, &flower, block_id);
> > -    tc_del_filter(ifindex, 1, 1, block_id);
> > +    error = tc_replace_flower(ifindex, 1, 2, &flower, block_id, false);
> > +    tc_del_filter(ifindex, 1, 1, block_id, false);
> >
> >      if (error) {
> >          goto out;
> >      }
> >
> > -    tc_del_filter(ifindex, 1, 2, block_id);
> > +    tc_del_filter(ifindex, 1, 2, block_id, false);
> >
> >      multi_mask_per_prio = true;
> >      VLOG_INFO("probe tc: multiple masks on single tc prio is supported.");
> >
> >  out:
> > -    tc_add_del_ingress_qdisc(ifindex, false, block_id);
> > +    tc_add_del_qdisc(ifindex, false, block_id, false);
> >  }
> >
> >  static void
> > @@ -1509,12 +1509,12 @@ probe_tc_block_support(int ifindex)
> >      uint32_t block_id = 1;
> >      int error;
> >
> > -    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
> > +    error = tc_add_del_qdisc(ifindex, true, block_id, false);
> >      if (error) {
> >          return;
> >      }
> >
> > -    tc_add_del_ingress_qdisc(ifindex, false, block_id);
> > +    tc_add_del_qdisc(ifindex, false, block_id, false);
> >
> >      block_support = true;
> >      VLOG_INFO("probe tc: block offload is supported.");
> > @@ -1537,7 +1537,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
> >      }
> >
> >      /* make sure there is no ingress qdisc */
> > -    tc_add_del_ingress_qdisc(ifindex, false, 0);
> > +    tc_add_del_qdisc(ifindex, false, 0, false);
> >
> >      if (ovsthread_once_start(&block_once)) {
> >          probe_tc_block_support(ifindex);
> > @@ -1550,7 +1550,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
> >      }
> >
> >      block_id = get_block_id_from_netdev(netdev);
> > -    error = tc_add_del_ingress_qdisc(ifindex, true, block_id);
> > +    error = tc_add_del_qdisc(ifindex, true, block_id, false);
> >
> >      if (error && error != EEXIST) {
> >          VLOG_ERR("failed adding ingress qdisc required for offloading: %s",
> > diff --git a/lib/tc.c b/lib/tc.c
> > index 3548760..f77a532 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -200,14 +200,21 @@ tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
> >      return error;
> >  }
> >
> > -/* Adds or deletes a root ingress qdisc on device with specified ifindex.
> > +/* Adds or deletes a root qdisc on device with specified ifindex.
> >   *
> > - * This function is equivalent to running the following when 'add' is true:
> > + * The egress parameter determines if the qdisc is added on device ingress or
> > + * egress.
> > + *
> > + * If egress is false, this function is equivalent to running the following
> > + * when 'add' is true:
> >   *     /sbin/tc qdisc add dev <devname> handle ffff: ingress
> >   *
> >   * This function is equivalent to running the following when 'add' is false:
> >   *     /sbin/tc qdisc del dev <devname> handle ffff: ingress
> >   *
> > + * If egress is true, this function is equivalent to:
> > + * /sbin/tc qdisc (add|del) dev <devname> handle ffff: clsact
> > + *
> >   * Where dev <devname> is the device with specified ifindex name.
> >   *
> >   * The configuration and stats may be seen with the following command:
> > @@ -220,7 +227,7 @@ tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
> >   * Returns 0 if successful, otherwise a positive errno value.
> >   */
> >  int
> > -tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id)
> > +tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id, bool egress)
>
> It's now the second boolean value here. I think it will be a lot
> more readable (specially from callers) if we'll use an enum with
> ingress or egress options.
> I think also later the add/del boolean should be changed.
>

yeah, this makes sense.
Thanks

> >  {
> >      struct ofpbuf request;
> >      struct tcmsg *tcmsg;
> > @@ -229,11 +236,19 @@ tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id)
> >      int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
> >
> >      tcmsg = tc_make_request(ifindex, type, flags, &request);
> > -    tcmsg->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
> > -    tcmsg->tcm_parent = TC_H_INGRESS;
> > -    nl_msg_put_string(&request, TCA_KIND, "ingress");
> > +
> > +    if (egress) {
> > +        tcmsg->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
> > +        tcmsg->tcm_parent = TC_H_CLSACT;
> > +        nl_msg_put_string(&request, TCA_KIND, "clsact");
> > +    } else {
> > +        tcmsg->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
> > +        tcmsg->tcm_parent = TC_H_INGRESS;
> > +        nl_msg_put_string(&request, TCA_KIND, "ingress");
> > +    }
> > +
> >      nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
> > -    if (block_id) {
> > +    if (!egress && block_id) {
> >          nl_msg_put_u32(&request, TCA_INGRESS_BLOCK, block_id);
> >      }
> >
> > @@ -1454,7 +1469,8 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
> >  }
> >
> >  int
> > -tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
> > +tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id,
> > +                     bool egress)
> >  {
> >      struct ofpbuf request;
> >      struct tcmsg *tcmsg;
> > @@ -1462,7 +1478,8 @@ tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
> >
> >      index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
> >      tcmsg = tc_make_request(index, RTM_GETTFILTER, NLM_F_DUMP, &request);
> > -    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
> > +    tcmsg->tcm_parent =
> > +        egress ? TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
> >      tcmsg->tcm_info = TC_H_UNSPEC;
> >      tcmsg->tcm_handle = 0;
> >
> > @@ -1473,7 +1490,7 @@ tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id)
> >  }
> >
> >  int
> > -tc_flush(int ifindex, uint32_t block_id)
> > +tc_flush(int ifindex, uint32_t block_id, bool egress)
> >  {
> >      struct ofpbuf request;
> >      struct tcmsg *tcmsg;
> > @@ -1481,14 +1498,16 @@ tc_flush(int ifindex, uint32_t block_id)
> >
> >      index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
> >      tcmsg = tc_make_request(index, RTM_DELTFILTER, NLM_F_ACK, &request);
> > -    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
> > +    tcmsg->tcm_parent =
> > +        egress ? TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
> >      tcmsg->tcm_info = TC_H_UNSPEC;
> >
> >      return tc_transact(&request, NULL);
> >  }
> >
> >  int
> > -tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
> > +tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id,
> > +              bool egress)
> >  {
> >      struct ofpbuf request;
> >      struct tcmsg *tcmsg;
> > @@ -1498,7 +1517,8 @@ tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
> >
> >      index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
> >      tcmsg = tc_make_request(index, RTM_DELTFILTER, NLM_F_ECHO, &request);
> > -    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
> > +    tcmsg->tcm_parent =
> > +        egress ? TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
> >      tcmsg->tcm_info = tc_make_handle(prio, 0);
> >      tcmsg->tcm_handle = handle;
> >
> > @@ -1511,7 +1531,7 @@ tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id)
> >
> >  int
> >  tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower,
> > -              uint32_t block_id)
> > +              uint32_t block_id, bool egress)
> >  {
> >      struct ofpbuf request;
> >      struct tcmsg *tcmsg;
> > @@ -1521,7 +1541,8 @@ tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower,
> >
> >      index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
> >      tcmsg = tc_make_request(index, RTM_GETTFILTER, NLM_F_ECHO, &request);
> > -    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
> > +    tcmsg->tcm_parent =
> > +        egress ? TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
> >      tcmsg->tcm_info = tc_make_handle(prio, 0);
> >      tcmsg->tcm_handle = handle;
> >
> > @@ -2280,7 +2301,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
> >
> >  int
> >  tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
> > -                  struct tc_flower *flower, uint32_t block_id)
> > +                  struct tc_flower *flower, uint32_t block_id, bool egress)
> >  {
> >      struct ofpbuf request;
> >      struct tcmsg *tcmsg;
> > @@ -2293,7 +2314,8 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
> >      index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
> >      tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
> >                              &request);
> > -    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
> > +    tcmsg->tcm_parent =
> > +        egress ? TC_EGRESS_PARENT : (block_id ? : TC_INGRESS_PARENT);
> >      tcmsg->tcm_info = tc_make_handle(prio, eth_type);
> >      tcmsg->tcm_handle = handle;
> >
> > diff --git a/lib/tc.h b/lib/tc.h
> > index 154e120..b9e75b2 100644
> > --- a/lib/tc.h
> > +++ b/lib/tc.h
> > @@ -36,8 +36,12 @@
> >  #ifndef TC_H_MIN_INGRESS
> >  #define TC_H_MIN_INGRESS       0xFFF2U
> >  #endif
> > +#ifndef TC_H_MIN_EGRESS
> > +#define TC_H_MIN_EGRESS       0xFFF3U
> > +#endif
> >
> >  #define TC_INGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)
> > +#define TC_EGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS)
> >
> >  #define TC_POLICY_DEFAULT "none"
> >
> > @@ -72,7 +76,7 @@ tc_get_minor(unsigned int handle)
> >  struct tcmsg *tc_make_request(int ifindex, int type,
> >                                unsigned int flags, struct ofpbuf *);
> >  int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
> > -int tc_add_del_ingress_qdisc(int ifindex, bool add, uint32_t block_id);
> > +int tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id, bool egress);
> >
> >  struct tc_cookie {
> >      const void *data;
> > @@ -225,12 +229,15 @@ BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
> >                    + sizeof(uint32_t) - 2 < sizeof(struct tc_flower));
> >
> >  int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
> > -                      struct tc_flower *flower, uint32_t block_id);
> > -int tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id);
> > +                      struct tc_flower *flower, uint32_t block_id,
> > +                      bool egress);
> > +int tc_del_filter(int ifindex, int prio, int handle, uint32_t block_id,
> > +                  bool egress);
> >  int tc_get_flower(int ifindex, int prio, int handle,
> > -                  struct tc_flower *flower, uint32_t block_id);
> > -int tc_flush(int ifindex, uint32_t block_id);
> > -int tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id);
> > +                  struct tc_flower *flower, uint32_t block_id, bool egress);
> > +int tc_flush(int ifindex, uint32_t block_id, bool egress);
> > +int tc_dump_flower_start(int ifindex, struct nl_dump *dump, uint32_t block_id,
> > +                         bool egress);
> >  int parse_netlink_to_tc_flower(struct ofpbuf *reply,
> >                                 struct tc_flower *flower);
> >  void tc_set_policy(const char *policy);
> >


More information about the dev mailing list