[ovs-dev] [PATCH v1 2/2] netdev-dpdk.c: Add ingress-policing functionality.
Miguel Angel Ajo Pelayo
majopela at redhat.com
Fri Apr 15 08:31:48 UTC 2016
On Wed, Apr 13, 2016 at 4:48 PM, Ian Stokes <ian.stokes at intel.com> wrote:
> This patch provides the modifications required in netdev-dpdk.c and
> vswitch.xml to enable ingress policing for DPDK interfaces.
>
> This patch implements the necessary netdev functions to netdev-dpdk.c as
> well as various helper functions required for ingress policing.
>
> The vswitch.xml has been modified to explain the expected parameters and
> behaviour when using ingress policing.
>
> The INSTALL.DPDK.md guide has been modified to provide an example
> configuration of ingress policing.
>
> Signed-off-by: Ian Stokes <ian.stokes at intel.com>
> ---
>
> v1:
>
> *INSTALL.DPDK.md
> - Add ingress_policing usage example.
>
> *NEWS
> - Add support for ingress_policing to DPDK section.
>
> *netdev-dpdk.c
> - Add ingress_policer struct.
> - Modify struct netdev-dpdk to include a ovsrcu pointer to an ingress
> policer.
> - Modify netdev_dpdk_init() to initialise ingress policer to null.
> - Add function 'ingress_policer_run()'to process packets with a given
> netdev, rte_mbuf and packet count.
> - Modified 'netdev_dpdk_vhost_update_rx_counters()' to accept new
> parameter int dropped representing number of packets dropped.
> - Modified 'netdev_dpdk_vhost_update_rx_counters()' to update number of
> dropped packets in stats->rx_dropped.
> - Modified 'netdev_dpdk_vhost_rxq_recv()' to check and call
> ingress_policing functionality with 'ingress_policer_run()'.
> - Modified 'netdev_dpdk_rxq_recv()' to check and call
> ingress_policing functionality with 'ingress_policer_run()'.
> - Modified 'netdev_dpdk_rxq_recv()' to update rx dropped stats.
> - Modified 'netdev_dpdk_vhost_get_stats()' to add support for rx_dropped.
> - Add function 'netdev_dpdk_policer_construct__()' to create an
> ingress_policer.
> - Add function 'netdev_dpdk_policer_destruct__()' to destroy an ingress
> policer.
> - Add function 'netdev_dpdk_set_policing()' responsible for calling
> helper functions to create and destroy an ingress policer for the
> device.
> - Add function 'netdev_dpdk_get_ingress_policer()' to return a pointer
> to the ingress_policer using the ovsrcu_get function.
>
> *vswitch.xml
> - Modify ingress policing section to include support in OVS with DPDK.
> ---
> INSTALL.DPDK.md | 19 ++++++
> NEWS | 1 +
> lib/netdev-dpdk.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++--
> vswitchd/vswitch.xml | 4 +-
> 4 files changed, 190 insertions(+), 7 deletions(-)
>
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 9ec8bf6..df2c6cd 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -227,6 +227,25 @@ Using the DPDK with ovs-vswitchd:
> For more details regarding egress-policer parameters please refer to the
> vswitch.xml.
>
> +9. Ingress Policing Example
> +
> + Assuming you have a vhost-user port receiving traffic consisting of
> + packets of size 64 bytes, the following command would limit the reception
> + rate of the port to ~1,000,000 packets per second:
> +
> + `ovs-vsctl set interface vhost-user0 ingress_policing_rate=368000
> + ingress_policing_burst=1000`
> +
> + To examine the ingress policer configuration of the port:
> +
> + `ovs-vsctl list interface vhost-user0`
> +
> + To clear the ingress policer configuration from the port use the following:
> +
> + `ovs-vsctl set interface vhost-user0 ingress_policing_rate=0`
> +
> + For more details regarding ingress-policer see the vswitch.xml.
> +
> Performance Tuning:
> -------------------
>
> diff --git a/NEWS b/NEWS
> index ea7f3a1..d0283fa 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -26,6 +26,7 @@ Post-v2.5.0
> assignment.
> * Type of log messages from PMD threads changed from INFO to DBG.
> * QoS functionality with sample egress-policer implementation.
> + * Add ingress policing functionality.
> - ovs-benchmark: This utility has been removed due to lack of use and
> bitrot.
> - ovs-appctl:
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index fe9c9cb..2d955d3 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -297,6 +297,12 @@ struct dpdk_ring {
> struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
> };
>
> +struct ingress_policer {
> + struct rte_meter_srtcm_params app_srtcm_params;
> + struct rte_meter_srtcm in_policer;
> + rte_spinlock_t policer_lock;
> +};
> +
> struct netdev_dpdk {
> struct netdev up;
> int port_id;
> @@ -342,6 +348,10 @@ struct netdev_dpdk {
> struct qos_conf *qos_conf;
> rte_spinlock_t qos_lock;
>
> + /* Ingress Policer */
> + OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
> + uint32_t policer_rate;
> + uint32_t policer_burst;
> };
>
> struct netdev_rxq_dpdk {
> @@ -355,6 +365,9 @@ static int netdev_dpdk_construct(struct netdev *);
>
> struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
>
> +struct ingress_policer *
> +netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
> +
> static bool
> is_dpdk_class(const struct netdev_class *class)
> {
> @@ -721,6 +734,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
> dev->qos_conf = NULL;
> rte_spinlock_init(&dev->qos_lock);
>
> + /* Initialise rcu pointer for ingress policer to NULL */
> + ovsrcu_init(&dev->ingress_policer, NULL);
> + dev->policer_rate = 0;
> + dev->policer_burst = 0;
> +
> netdev->n_txq = NR_QUEUE;
> netdev->n_rxq = NR_QUEUE;
> netdev->requested_n_rxq = NR_QUEUE;
> @@ -1152,6 +1170,22 @@ netdev_dpdk_policer_run__(struct rte_meter_srtcm *meter,
> return cnt;
> }
>
> +static int
> +ingress_policer_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> + int pkt_cnt)
> +{
> + int cnt = 0;
> + struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> +
> + if (policer) {
> + rte_spinlock_lock(&policer->policer_lock);
> + cnt = netdev_dpdk_policer_run__(&policer->in_policer, pkts, pkt_cnt);
> + rte_spinlock_unlock(&policer->policer_lock);
> + }
> +
> + return cnt;
> +}
> +
> static bool
> is_vhost_running(struct virtio_net *virtio_dev)
> {
> @@ -1160,12 +1194,14 @@ is_vhost_running(struct virtio_net *virtio_dev)
>
> static inline void
> netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
> - struct dp_packet **packets, int count)
> + struct dp_packet **packets, int count,
> + int dropped)
> {
> int i;
> struct dp_packet *packet;
>
> stats->rx_packets += count;
> + stats->rx_dropped += dropped;
> for (i = 0; i < count; i++) {
> packet = packets[i];
>
> @@ -1197,7 +1233,9 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> int qid = rxq->queue_id;
> + struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> uint16_t nb_rx = 0;
> + uint16_t dropped = 0;
>
> if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
> return EAGAIN;
> @@ -1215,8 +1253,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> return EAGAIN;
> }
>
> + if (policer != NULL) {
> + dropped = nb_rx;
> + nb_rx = ingress_policer_run(dev, (struct rte_mbuf **)packets, nb_rx);
> + dropped -= nb_rx;
> + }
> +
> rte_spinlock_lock(&dev->stats_lock);
> - netdev_dpdk_vhost_update_rx_counters(&dev->stats, packets, nb_rx);
> + netdev_dpdk_vhost_update_rx_counters(&dev->stats, packets, nb_rx, dropped);
> rte_spinlock_unlock(&dev->stats_lock);
>
> *c = (int) nb_rx;
> @@ -1229,7 +1273,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
> {
> struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
> struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> + struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> int nb_rx;
> + int dropped = 0;
>
> /* There is only one tx queue for this core. Do not flush other
> * queues.
> @@ -1247,6 +1293,19 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
> return EAGAIN;
> }
>
> + if (policer != NULL) {
> + dropped = nb_rx;
> + nb_rx = ingress_policer_run(dev, (struct rte_mbuf **) packets, nb_rx);
> + dropped -= nb_rx;
> + }
> +
> + /* Update stats to reflect dropped packets */
> + if (OVS_UNLIKELY(dropped)) {
> + rte_spinlock_lock(&dev->stats_lock);
> + dev->stats.rx_dropped += dropped;
> + rte_spinlock_unlock(&dev->stats_lock);
> + }
> +
> *c = nb_rx;
>
> return 0;
> @@ -1689,13 +1748,13 @@ netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
> stats->tx_fifo_errors = UINT64_MAX;
> stats->tx_heartbeat_errors = UINT64_MAX;
> stats->tx_window_errors = UINT64_MAX;
> - stats->rx_dropped += UINT64_MAX;
>
> rte_spinlock_lock(&dev->stats_lock);
> /* Supported Stats */
> stats->rx_packets += dev->stats.rx_packets;
> stats->tx_packets += dev->stats.tx_packets;
> stats->tx_dropped += dev->stats.tx_dropped;
> + stats->rx_dropped = dev->stats.rx_dropped;
> stats->multicast = dev->stats.multicast;
> stats->rx_bytes = dev->stats.rx_bytes;
> stats->tx_bytes = dev->stats.tx_bytes;
> @@ -1736,7 +1795,7 @@ netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
>
> /* These are the available DPDK counters for packets not received due to
> * local resource constraints in DPDK and NIC respectively. */
> - stats->rx_dropped = rte_stats.rx_nombuf + rte_stats.imissed;
> + stats->rx_dropped += rte_stats.rx_nombuf + rte_stats.imissed;
> stats->collisions = UINT64_MAX;
>
> stats->rx_length_errors = UINT64_MAX;
> @@ -1804,6 +1863,104 @@ netdev_dpdk_get_features(const struct netdev *netdev,
> }
>
> static int
> +netdev_dpdk_policer_construct__(struct netdev *netdev_, uint32_t rate,
> + uint32_t burst)
> +{
> + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
> + struct ingress_policer *policer;
> + uint64_t rate_bytes;
> + uint64_t burst_bytes;
> + int err = 0;
> +
> + policer = xmalloc(sizeof *policer);
> + rte_spinlock_init(&policer->policer_lock);
> +
> + /* rte_meter requires bytes so convert kbits rate and burst to bytes. */
> + rate_bytes = rate * 125;
> + burst_bytes = burst * 125;
Nit suggestion: I'd use * 1000/8 instead to make it clearer, the
compiler optimizes that back to 125.
> +
> + policer->app_srtcm_params.cir = rate_bytes;
> + policer->app_srtcm_params.cbs = burst_bytes;
> + policer->app_srtcm_params.ebs = 0;
> + err = rte_meter_srtcm_config(&policer->in_policer,
> + &policer->app_srtcm_params);
> + ovsrcu_set(&netdev->ingress_policer, policer);
> +
> + return err;
> +}
> +
> +static int netdev_dpdk_policer_destruct__(struct ingress_policer * policer)
> +{
> + if (policer == NULL) {
> + return 0;
> + }
> + else {
> + free(policer);
> + }
> + return 0;
> +}
> +
> +static int
> +netdev_dpdk_set_policing(struct netdev* netdev_, uint32_t policer_rate,
> + uint32_t policer_burst)
> +{
> + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
> + struct ingress_policer *policer;
> + int err = 0;
> +
> + /* Force to 0 if no rate specified,
> + * default to 1000 kbits if burst is 0,
> + * else stick with user-specified value.
> + */
> + policer_burst = (!policer_rate ? 0
> + : !policer_burst ? 1000
Please note, that in this patch here I'm considering to raise the
burst to 8000 kbits instead, (since the TC linux-dev version was
setting that before because of the *8 bug).
https://patchwork.ozlabs.org/patch/610408/
> + : policer_burst);
> +
> + ovs_mutex_lock(&netdev->mutex);
> +
> + policer = ovsrcu_get_protected(struct ingress_policer *,
> + &netdev->ingress_policer);
> +
> + if (netdev->policer_rate == policer_rate &&
> + netdev->policer_burst == policer_burst) {
> + /* Assume that settings haven't changed since we last set them. */
> + ovs_mutex_unlock(&netdev->mutex);
> + return err;
> + }
> +
> + /* Destroy any existing ingress policer for the device if one exists
> + * and the policer_rate and policer_burst are being set to 0
> + */
> + if ((policer != NULL) && (policer_rate == 0) && (policer_burst == 0)) {
> + /* User has set rate and burst to 0, destroy the policer */
> + ovsrcu_postpone(netdev_dpdk_policer_destruct__, policer);
> + ovsrcu_set(&netdev->ingress_policer, NULL);
> + netdev->policer_rate = 0;
> + netdev->policer_burst = 0;
> + ovs_mutex_unlock(&netdev->mutex);
> + return err;
> + }
> +
> + /* Destroy existing policer before adding new policer */
> + if (policer != NULL) {
> + ovsrcu_postpone(netdev_dpdk_policer_destruct__, policer);
> + ovsrcu_set(&netdev->ingress_policer, NULL);
> + netdev->policer_rate = 0;
> + netdev->policer_burst = 0;
> + }
> +
> + /* Add an ingress policer */
> + err = netdev_dpdk_policer_construct__(netdev_, policer_rate,
> + policer_burst);
> +
> + /* Update policer_rate and policer_burst for the netdev */
> + netdev->policer_rate = policer_rate;
> + netdev->policer_burst = policer_burst;
> + ovs_mutex_unlock(&netdev->mutex);
> + return err;
> +}
> +
> +static int
> netdev_dpdk_get_ifindex(const struct netdev *netdev)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> @@ -2250,6 +2407,12 @@ netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
> return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
> }
>
> +struct ingress_policer *
> +netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
> +{
> + return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
> +}
> +
> /*
> * These callbacks allow virtio-net devices to be added to vhost ports when
> * configuration has been fully complete.
> @@ -2703,7 +2866,7 @@ static const struct dpdk_qos_ops egress_policer_ops = {
> GET_FEATURES, \
> NULL, /* set_advertisements */ \
> \
> - NULL, /* set_policing */ \
> + netdev_dpdk_set_policing, /* set_policing */ \
> netdev_dpdk_get_qos_types, \
> NULL, /* get_qos_capabilities */ \
> netdev_dpdk_get_qos, \
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 7d6976f..a7d2391 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -2388,8 +2388,8 @@
> table="Queue"/> tables).
> </p>
> <p>
> - Policing is currently implemented only on Linux. The Linux
> - implementation uses a simple ``token bucket'' approach:
> + Policing is currently implemented on Linux and OVS with DPDK. Both
> + implementations use a simple ``token bucket'' approach:
> </p>
> <ul>
> <li>
> --
> 1.7.4.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list