[ovs-dev] [PATCH v2 1/2] add port-based ingress policing based packet-per-second rate-limiting

Marcelo Ricardo Leitner mleitner at redhat.com
Thu May 20 15:48:53 UTC 2021


Hi,

On Mon, May 17, 2021 at 01:18:53PM +0200, Simon Horman wrote:
...
> @@ -547,6 +549,12 @@ is_tap_netdev(const struct netdev *netdev)
>      return netdev_get_class(netdev) == &netdev_tap_class;
>  }
>  

> +enum {
> +    OVS_TC_QOS_TYPE_BPS,
> +    OVS_TC_QOS_TYPE_PPS,
> +    OVS_TC_QOS_TYPE_MAX,
> +};
> +

>  static int
>  netdev_linux_netnsid_update__(struct netdev_linux *netdev)
>  {
> @@ -2593,24 +2601,50 @@ tc_matchall_fill_police(uint32_t kbits_rate, uint32_t kbits_burst)
>  }
>
>  static void
> -nl_msg_put_act_police(struct ofpbuf *request, struct tc_police police)
> +nl_msg_put_act_police(struct ofpbuf *request, struct tc_police police,
> +                      uint32_t kpkts_rate, uint32_t kpkts_burst)
>  {
> -    size_t offset;
> +    size_t offset, act_offset;
> +    uint32_t i = 0, prio = 0;
>
> -    nl_msg_put_string(request, TCA_ACT_KIND, "police");
> -    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
> -    nl_msg_put_unspec(request, TCA_POLICE_TBF, &police, sizeof police);
> -    tc_put_rtab(request, TCA_POLICE_RATE, &police.rate);
> -    nl_msg_put_u32(request, TCA_POLICE_RESULT, TC_ACT_UNSPEC);
> -    nl_msg_end_nested(request, offset);
> +    for (i = 0; i < OVS_TC_QOS_TYPE_MAX; i++) {
> +        if (i == OVS_TC_QOS_TYPE_BPS && !police.rate.rate) {
> +            continue;
> +        }
> +        if (i == OVS_TC_QOS_TYPE_PPS && !kpkts_rate) {
> +            continue;
> +        }
> +        act_offset = nl_msg_start_nested(request, ++prio);
> +        nl_msg_put_string(request, TCA_ACT_KIND, "police");
> +        offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
> +        if (i == OVS_TC_QOS_TYPE_BPS && police.rate.rate) {

Nit, the "&& ..." condition is not needed, due to the if()s above.

> +            tc_put_rtab(request, TCA_POLICE_RATE, &police.rate);
> +        } else if (i == OVS_TC_QOS_TYPE_PPS && kpkts_rate) {
> +            unsigned int pkt_burst_ticks, pps_rate;
> +            /* for PPS, set rate as 0 to act as a single action */
> +            police.rate.rate = 0;
> +            police.burst = 0;
> +            police.rate.cell_log = 0;

This is changing the parameter and implicitly relies on the fact that
OVS_TC_QOS_TYPE_PPS > OVS_TC_QOS_TYPE_BPS. It works, but may cause
problems in the future.

Maybe add a new variable that will be actually used by
nl_msg_put_unspec() below, and initialize it for each case?


> +            pps_rate = kpkts_rate * 1000;
> +            pkt_burst_ticks = tc_bytes_to_ticks(pps_rate,
> +                                                MIN(UINT32_MAX / 1024, kpkts_burst) * 1024);

I don't get why for pps_rate 'k' is 1000, and for bursts, it's 1024.
Anyhow, this is also there for bps and it makes sense to follow that.
But some mention to it in the docs below is welcomed.

> +            nl_msg_put_u64(request, TCA_POLICE_PKTRATE64, (uint64_t)pps_rate);
> +            nl_msg_put_u64(request, TCA_POLICE_PKTBURST64, (uint64_t)pkt_burst_ticks);
> +        }
> +        nl_msg_put_unspec(request, TCA_POLICE_TBF, &police, sizeof police);
> +        nl_msg_put_u32(request, TCA_POLICE_RESULT, TC_ACT_PIPE);
> +        nl_msg_end_nested(request, offset);
> +        nl_msg_end_nested(request, act_offset);
> +    }
>  }
>
>  static int
>  tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
> -                        uint32_t kbits_burst)
> +                        uint32_t kbits_burst, uint32_t kpkts_rate,
> +                        uint32_t kpkts_burst)
>  {
>      uint16_t eth_type = (OVS_FORCE uint16_t) htons(ETH_P_ALL);
> -    size_t basic_offset, action_offset, inner_offset;
> +    size_t basic_offset, action_offset;
>      uint16_t prio = TC_RESERVED_PRIORITY_POLICE;
>      int ifindex, err = 0;
>      struct tc_police pol_act;
> @@ -2634,9 +2668,7 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
>      nl_msg_put_string(&request, TCA_KIND, "matchall");
>      basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
>      action_offset = nl_msg_start_nested(&request, TCA_MATCHALL_ACT);
> -    inner_offset = nl_msg_start_nested(&request, 1);
> -    nl_msg_put_act_police(&request, pol_act);
> -    nl_msg_end_nested(&request, inner_offset);
> +    nl_msg_put_act_police(&request, pol_act, kpkts_rate, kpkts_burst);
>      nl_msg_end_nested(&request, action_offset);
>      nl_msg_end_nested(&request, basic_offset);
>
> @@ -2676,8 +2708,9 @@ tc_del_matchall_policer(struct netdev *netdev)
>  /* Attempts to set input rate limiting (policing) policy.  Returns 0 if
>   * successful, otherwise a positive errno value. */
>  static int
> -netdev_linux_set_policing(struct netdev *netdev_,
> -                          uint32_t kbits_rate, uint32_t kbits_burst)
> +netdev_linux_set_policing(struct netdev *netdev_, uint32_t kbits_rate,
> +                          uint32_t kbits_burst, uint32_t kpkts_rate,
> +                          uint32_t kpkts_burst)
>  {
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      const char *netdev_name = netdev_get_name(netdev_);
> @@ -2688,6 +2721,10 @@ netdev_linux_set_policing(struct netdev *netdev_,
>                     : !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
>                     : kbits_burst);       /* Stick with user-specified value. */
>
> +    kpkts_burst = (!kpkts_rate ? 0       /* Force to 0 if no rate specified. */
> +                   : !kpkts_burst ? 16   /* Default to 16000 packets if 0. */

It should be 16 -> 16*1024, or just '16 kpps'.

> +                   : kpkts_burst);       /* Stick with user-specified value. */
> +
>      ovs_mutex_lock(&netdev->mutex);
>      if (netdev_linux_netnsid_is_remote(netdev)) {
>          error = EOPNOTSUPP;
> @@ -2697,7 +2734,9 @@ netdev_linux_set_policing(struct netdev *netdev_,
>      if (netdev->cache_valid & VALID_POLICING) {
>          error = netdev->netdev_policing_error;
>          if (error || (netdev->kbits_rate == kbits_rate &&
> -                      netdev->kbits_burst == kbits_burst)) {
> +                      netdev->kpkts_rate == kpkts_rate &&
> +                      netdev->kbits_burst == kbits_burst &&
> +                      netdev->kpkts_burst == kpkts_burst)) {
>              /* Assume that settings haven't changed since we last set them. */
>              goto out;
>          }
> @@ -2709,8 +2748,9 @@ netdev_linux_set_policing(struct netdev *netdev_,
>      /* Use matchall for policing when offloadling ovs with tc-flower. */
>      if (netdev_is_flow_api_enabled()) {
>          error = tc_del_matchall_policer(netdev_);
> -        if (kbits_rate) {
> -            error = tc_add_matchall_policer(netdev_, kbits_rate, kbits_burst);
> +        if (kbits_rate || kpkts_rate) {
> +            error = tc_add_matchall_policer(netdev_, kbits_rate, kbits_burst,
> +                                            kpkts_rate, kpkts_burst);
>          }
>          ovs_mutex_unlock(&netdev->mutex);
>          return error;
> @@ -2729,7 +2769,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
>          goto out;
>      }
>
> -    if (kbits_rate) {
> +    if (kbits_rate || kpkts_rate) {
>          error = tc_add_del_qdisc(ifindex, true, 0, TC_INGRESS);
>          if (error) {
>              VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
> @@ -2737,7 +2777,8 @@ netdev_linux_set_policing(struct netdev *netdev_,
>              goto out;
>          }
>
> -        error = tc_add_policer(netdev_, kbits_rate, kbits_burst);
> +        error = tc_add_policer(netdev_, kbits_rate, kbits_burst,
> +                               kpkts_rate, kpkts_burst);
>          if (error){
>              VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s",
>                      netdev_name, ovs_strerror(error));
> @@ -2747,6 +2788,8 @@ netdev_linux_set_policing(struct netdev *netdev_,
>
>      netdev->kbits_rate = kbits_rate;
>      netdev->kbits_burst = kbits_burst;
> +    netdev->kpkts_rate = kpkts_rate;
> +    netdev->kpkts_burst = kpkts_burst;
>
>  out:
>      if (!error || error == ENODEV) {
> @@ -5523,7 +5566,7 @@ netdev_linux_tc_make_request(const struct netdev *netdev, int type,
>  }
>
>  /* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size
> - * of 'kbits_burst'.
> + * of 'kbits_burst', with a rate of 'kpkts_rate' and a burst size of 'kpkts_burst'.
>   *
>   * This function is equivalent to running:
>   *     /sbin/tc filter add dev <devname> parent ffff: protocol all prio 49
> @@ -5536,14 +5579,13 @@ netdev_linux_tc_make_request(const struct netdev *netdev, int type,
>   * Returns 0 if successful, otherwise a positive errno value.
>   */
>  static int
> -tc_add_policer(struct netdev *netdev,
> -               uint32_t kbits_rate, uint32_t kbits_burst)
> +tc_add_policer(struct netdev *netdev, uint32_t kbits_rate,
> +               uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t kpkts_burst)
>  {
> +    size_t basic_offset, police_offset;
>      struct tc_police tc_police;
>      struct ofpbuf request;
>      struct tcmsg *tcmsg;
> -    size_t basic_offset;
> -    size_t police_offset;
>      int error;
>      int mtu = 65535;
>
> @@ -5561,7 +5603,6 @@ tc_add_policer(struct netdev *netdev,
>       * tc's point of view.  Whatever. */
>      tc_police.burst = tc_bytes_to_ticks(
>          tc_police.rate.rate, MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 8);
> -
>      tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTFILTER,
>                                           NLM_F_EXCL | NLM_F_CREATE, &request);
>      if (!tcmsg) {
> @@ -5570,12 +5611,10 @@ tc_add_policer(struct netdev *netdev,
>      tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
>      tcmsg->tcm_info = tc_make_handle(49,
>                                       (OVS_FORCE uint16_t) htons(ETH_P_ALL));
> -
>      nl_msg_put_string(&request, TCA_KIND, "basic");
>      basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
> -    police_offset = nl_msg_start_nested(&request, TCA_BASIC_POLICE);
> -    nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof tc_police);
> -    tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate);
> +    police_offset = nl_msg_start_nested(&request, TCA_BASIC_ACT);
> +    nl_msg_put_act_police(&request, tc_police, kpkts_rate, kpkts_burst);
>      nl_msg_end_nested(&request, police_offset);
>      nl_msg_end_nested(&request, basic_offset);
>
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 73dce2fca..b5420947d 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -514,13 +514,16 @@ struct netdev_class {
>       * NETDEV_PT_LEGACY_L2. */
>      enum netdev_pt_mode (*get_pt_mode)(const struct netdev *netdev);
>
> -    /* Attempts to set input rate limiting (policing) policy, such that up to
> -     * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative
> -     * burst size of 'kbits' kb.
> +    /* Attempts to set input rate limiting (policing) policy, such that:
> +     * - up to 'kbits_rate' kbps of traffic is accepted, with a maximum
> +     *   accumulative burst size of 'kbits' kb; and
> +     * - up to 'kpkts' kpps of traffic is accepted, with a maximum
> +     *   accumulative burst size of 'kpkts' kilo packets.
>       *
>       * This function may be set to null if policing is not supported. */
>      int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate,
> -                        unsigned int kbits_burst);
> +                        unsigned int kbits_burst, unsigned int kpkts_rate,
> +                        unsigned int kpkts_burst);
>
>      /* Adds to 'types' all of the forms of QoS supported by 'netdev', or leaves
>       * it empty if 'netdev' does not support QoS.  Any names added to 'types'
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 91e91955c..8305f6c42 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -1619,17 +1619,20 @@ netdev_get_custom_stats(const struct netdev *netdev,
>      return error;
>  }
>
> -
> -/* Attempts to set input rate limiting (policing) policy, such that up to
> - * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst
> - * size of 'kbits' kb. */
> +/* Attempts to set input rate limiting (policing) policy, such that:
> + * - up to 'kbits_rate' kbps of traffic is accepted, with a maximum
> + *   accumulative burst size of 'kbits' kb; and
> + * - up to 'kpkts' kpps of traffic is accepted, with a maximum
> + *   accumulative burst size of 'kpkts' kilo packets.
> + */
>  int
>  netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate,
> -                    uint32_t kbits_burst)
> +                    uint32_t kbits_burst, uint32_t kpkts_rate,
> +                    uint32_t kpkts_burst)
>  {
>      return (netdev->netdev_class->set_policing
>              ? netdev->netdev_class->set_policing(netdev,
> -                    kbits_rate, kbits_burst)
> +                    kbits_rate, kbits_burst, kpkts_rate, kpkts_burst)
>              : EOPNOTSUPP);
>  }
>
> diff --git a/lib/netdev.h b/lib/netdev.h
> index b705a9e56..acf174927 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -289,7 +289,8 @@ struct netdev_queue_stats {
>  };
>
>  int netdev_set_policing(struct netdev *, uint32_t kbits_rate,
> -                        uint32_t kbits_burst);
> +                        uint32_t kbits_burst, uint32_t kpkts_rate,
> +                        uint32_t kpkts_burst);
>
>  int netdev_get_qos_types(const struct netdev *, struct sset *types);
>  int netdev_get_qos_capabilities(const struct netdev *,
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 5ed7e8234..d1db675ff 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -4896,7 +4896,9 @@ iface_configure_qos(struct iface *iface, const struct ovsrec_qos *qos)
>
>      netdev_set_policing(iface->netdev,
>                          MIN(UINT32_MAX, iface->cfg->ingress_policing_rate),
> -                        MIN(UINT32_MAX, iface->cfg->ingress_policing_burst));
> +                        MIN(UINT32_MAX, iface->cfg->ingress_policing_burst),
> +                        MIN(UINT32_MAX, iface->cfg->ingress_policing_kpkts_rate),
> +                        MIN(UINT32_MAX, iface->cfg->ingress_policing_kpkts_burst));
>
>      ofpbuf_uninit(&queues_buf);
>  }
> diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
> index 0666c8c76..4873cfde7 100644
> --- a/vswitchd/vswitch.ovsschema
> +++ b/vswitchd/vswitch.ovsschema
> @@ -1,6 +1,6 @@
>  {"name": "Open_vSwitch",
> - "version": "8.2.0",
> - "cksum": "1076640191 26427",
> + "version": "8.3.0",
> + "cksum": "3781850481 26690",
>   "tables": {
>     "Open_vSwitch": {
>       "columns": {
> @@ -242,6 +242,12 @@
>         "ingress_policing_burst": {
>           "type": {"key": {"type": "integer",
>                            "minInteger": 0}}},
> +       "ingress_policing_kpkts_rate": {
> +         "type": {"key": {"type": "integer",
> +                          "minInteger": 0}}},
> +       "ingress_policing_kpkts_burst": {
> +         "type": {"key": {"type": "integer",
> +                          "minInteger": 0}}},
>         "mac_in_use": {
>           "type": {"key": {"type": "string"},
>                    "min": 0, "max": 1},
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 4597a215d..29f587b82 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -3654,8 +3654,18 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
>          table="Queue"/> tables).
>        </p>
>        <p>
> -        Policing is currently implemented on Linux and OVS with DPDK. Both
> -        implementations use a simple ``token bucket'' approach:
> +        Policing settings can be set with byte rate or packet rate, and they
> +        can be configured together, in which case they take effect together,
> +        that means the smaller speed limit of them is in effect.
> +      </p>
> +      <p>
> +        Currently, byte rate policing is implemented on Linux and OVS with
> +        DPDK, while packet rate policing is only implemented on Linux.  Both
> +        Linux and OVS DPDK implementations use a simple ``token bucket’’
> +        approach:
> +      </p>
> +      <p>
> +        Byte rate policing:
>        </p>
>        <ul>
>          <li>
> @@ -3673,6 +3683,26 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
>            rate specified by <ref column="ingress_policing_rate"/>.
>          </li>
>        </ul>
> +      <p>
> +        Packet rate policing:
> +      </p>
> +      <ul>
> +        <li>
> +          The size of the bucket corresponds to <ref
> +          column="ingress_policing_kpkts_burst"/>.  Initially the bucket is
> +          full.
> +        </li>
> +        <li>
> +          Whenever a packet is received, it will consume one token from the
> +          current bucket.  If the token is available in the bucket, it's
> +          removed and the packet is forwarded.  Otherwise, the packet is
> +          dropped.
> +        </li>
> +        <li>
> +          Whenever it is not full, the bucket is refilled with tokens at the
> +          rate specified by <ref column="ingress_policing_kpkts_rate"/>.
> +        </li>
> +      </ul>
>        <p>
>          Policing interacts badly with some network protocols, and especially
>          with fragmented IP packets.  Suppose that there is enough network
> @@ -3698,6 +3728,14 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
>          </p>
>        </column>
>
> +      <column name="ingress_policing_kpkts_rate">
> +        <p>
> +          Maximum rate for data received on this interface, in kpps.  Data
> +          received faster than this rate is dropped.  Set to <code>0</code>
> +          (the default) to disable policing.
> +        </p>
> +      </column>
> +
>        <column name="ingress_policing_burst">
>          <p>Maximum burst size for data received on this interface, in kb.  The
>          default burst size if set to <code>0</code> is 8000 kbit.  This value
> @@ -3712,6 +3750,24 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
>            closer to achieving the full rate.
>          </p>
>        </column>
> +
> +      <column name="ingress_policing_kpkts_burst">
> +        <p>
> +          Maximum burst size for data received on this interface, in
> +          kilo-packets.  The default burst size if set to <code>0</code> is
> +          16 kilo-packets. This value has no effect if <ref
> +          column="ingress_policing_kpkts_rate"/> is <code>0</code>.</p>
> +        <p>
> +          Specifying a larger burst size lets the algorithm be more
> +          forgiving, which is important for protocols like TCP that react
> +          severely to dropped packets.  The burst size should be at least
> +          the size of the interface's MTU.  Specifying a value that is
                             ^^^^^^^^^^^^^^^
> +          numerically at least as large as 80% of <ref
> +          column="ingress_policing_kpkts_rate"/> helps TCP come closer to
> +          achieving the full rate.

This paragraph is present for ingress_policing_burst, and that's all
in bytes. Seems it was partially updated to handle packets.

I fail to understand how MTU size, even if just numerically, is
related to a count of packets. 9000 MTU allows more data through with
less overhead but if with the same bandwidth, it would even have a
smaller pps than a 1500 MTU.

The guidance here needs updating.

Regards,
Marcelo



More information about the dev mailing list