[ovs-dev] [PATCH v1 2/2] netdev-dpdk.c: Add ingress-policing functionality.

Ian Stokes ian.stokes at intel.com
Wed Apr 13 14:48:12 UTC 2016


This patch provides the modifications required in netdev-dpdk.c and
vswitch.xml to enable ingress policing for DPDK interfaces.

This patch implements the necessary netdev functions to netdev-dpdk.c as
well as various helper functions required for ingress policing.

The vswitch.xml has been modified to explain the expected parameters and
behaviour when using ingress policing.

The INSTALL.DPDK.md guide has been modified to provide an example
configuration of ingress policing.

Signed-off-by: Ian Stokes <ian.stokes at intel.com>
---

v1:

*INSTALL.DPDK.md
- Add ingress_policing usage example.

*NEWS
- Add support for ingress_policing to DPDK section.

*netdev-dpdk.c
- Add ingress_policer struct.
- Modify struct netdev-dpdk to include a ovsrcu pointer to an ingress
  policer.
- Modify netdev_dpdk_init() to initialise ingress policer to null.
- Add function 'ingress_policer_run()'to process packets with a given
  netdev, rte_mbuf and packet count.
- Modified 'netdev_dpdk_vhost_update_rx_counters()' to accept new
  parameter int dropped representing number of packets dropped.
- Modified 'netdev_dpdk_vhost_update_rx_counters()' to update number of
  dropped packets in stats->rx_dropped.
- Modified 'netdev_dpdk_vhost_rxq_recv()' to check and call
  ingress_policing functionality with 'ingress_policer_run()'.
- Modified 'netdev_dpdk_rxq_recv()' to check and call
  ingress_policing functionality with 'ingress_policer_run()'.
- Modified 'netdev_dpdk_rxq_recv()' to update rx dropped stats.
- Modified 'netdev_dpdk_vhost_get_stats()' to add support for rx_dropped.
- Add function 'netdev_dpdk_policer_construct__()' to create an
  ingress_policer.
- Add function 'netdev_dpdk_policer_destruct__()' to destroy an ingress
  policer.
- Add function 'netdev_dpdk_set_policing()' responsible for calling
  helper functions to create and destroy an ingress policer for the
  device.
- Add function 'netdev_dpdk_get_ingress_policer()' to return a pointer
  to the ingress_policer using the ovsrcu_get function.

*vswitch.xml
- Modify ingress policing section to include support in OVS with DPDK.
---
 INSTALL.DPDK.md      |   19 ++++++
 NEWS                 |    1 +
 lib/netdev-dpdk.c    |  173 ++++++++++++++++++++++++++++++++++++++++++++++++--
 vswitchd/vswitch.xml |    4 +-
 4 files changed, 190 insertions(+), 7 deletions(-)

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index 9ec8bf6..df2c6cd 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -227,6 +227,25 @@ Using the DPDK with ovs-vswitchd:
    For more details regarding egress-policer parameters please refer to the
    vswitch.xml.
 
+9. Ingress Policing Example
+
+   Assuming you have a vhost-user port receiving traffic consisting of
+   packets of size 64 bytes, the following command would limit the reception
+   rate of the port to ~1,000,000 packets per second:
+
+   `ovs-vsctl set interface vhost-user0 ingress_policing_rate=368000
+    ingress_policing_burst=1000`
+
+   To examine the ingress policer configuration of the port:
+
+   `ovs-vsctl list interface vhost-user0`
+
+   To clear the ingress policer configuration from the port use the following:
+
+   `ovs-vsctl set interface vhost-user0 ingress_policing_rate=0`
+
+   For more details regarding ingress-policer see the vswitch.xml.
+
 Performance Tuning:
 -------------------
 
diff --git a/NEWS b/NEWS
index ea7f3a1..d0283fa 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,7 @@ Post-v2.5.0
        assignment.
      * Type of log messages from PMD threads changed from INFO to DBG.
      * QoS functionality with sample egress-policer implementation.
+     * Add ingress policing functionality.
    - ovs-benchmark: This utility has been removed due to lack of use and
      bitrot.
    - ovs-appctl:
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index fe9c9cb..2d955d3 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -297,6 +297,12 @@ struct dpdk_ring {
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
 };
 
+struct ingress_policer {
+    struct rte_meter_srtcm_params app_srtcm_params;
+    struct rte_meter_srtcm in_policer;
+    rte_spinlock_t policer_lock;
+};
+
 struct netdev_dpdk {
     struct netdev up;
     int port_id;
@@ -342,6 +348,10 @@ struct netdev_dpdk {
     struct qos_conf *qos_conf;
     rte_spinlock_t qos_lock;
 
+    /* Ingress Policer */
+    OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
+    uint32_t policer_rate;
+    uint32_t policer_burst;
 };
 
 struct netdev_rxq_dpdk {
@@ -355,6 +365,9 @@ static int netdev_dpdk_construct(struct netdev *);
 
 struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
 
+struct ingress_policer *
+netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
+
 static bool
 is_dpdk_class(const struct netdev_class *class)
 {
@@ -721,6 +734,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     dev->qos_conf = NULL;
     rte_spinlock_init(&dev->qos_lock);
 
+    /* Initialise rcu pointer for ingress policer to NULL */
+    ovsrcu_init(&dev->ingress_policer, NULL);
+    dev->policer_rate = 0;
+    dev->policer_burst = 0;
+
     netdev->n_txq = NR_QUEUE;
     netdev->n_rxq = NR_QUEUE;
     netdev->requested_n_rxq = NR_QUEUE;
@@ -1152,6 +1170,22 @@ netdev_dpdk_policer_run__(struct rte_meter_srtcm *meter,
     return cnt;
 }
 
+static int
+ingress_policer_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
+                        int pkt_cnt)
+{
+    int cnt = 0;
+    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
+
+    if (policer) {
+        rte_spinlock_lock(&policer->policer_lock);
+        cnt = netdev_dpdk_policer_run__(&policer->in_policer, pkts, pkt_cnt);
+        rte_spinlock_unlock(&policer->policer_lock);
+    }
+
+    return cnt;
+}
+
 static bool
 is_vhost_running(struct virtio_net *virtio_dev)
 {
@@ -1160,12 +1194,14 @@ is_vhost_running(struct virtio_net *virtio_dev)
 
 static inline void
 netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
-                                     struct dp_packet **packets, int count)
+                                     struct dp_packet **packets, int count,
+                                     int dropped)
 {
     int i;
     struct dp_packet *packet;
 
     stats->rx_packets += count;
+    stats->rx_dropped += dropped;
     for (i = 0; i < count; i++) {
         packet = packets[i];
 
@@ -1197,7 +1233,9 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
     struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
     struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
     int qid = rxq->queue_id;
+    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
     uint16_t nb_rx = 0;
+    uint16_t dropped = 0;
 
     if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
         return EAGAIN;
@@ -1215,8 +1253,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
         return EAGAIN;
     }
 
+    if (policer != NULL) {
+        dropped = nb_rx;
+        nb_rx = ingress_policer_run(dev, (struct rte_mbuf **)packets, nb_rx);
+        dropped -= nb_rx;
+    }
+
     rte_spinlock_lock(&dev->stats_lock);
-    netdev_dpdk_vhost_update_rx_counters(&dev->stats, packets, nb_rx);
+    netdev_dpdk_vhost_update_rx_counters(&dev->stats, packets, nb_rx, dropped);
     rte_spinlock_unlock(&dev->stats_lock);
 
     *c = (int) nb_rx;
@@ -1229,7 +1273,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
 {
     struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
     struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
+    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
     int nb_rx;
+    int dropped = 0;
 
     /* There is only one tx queue for this core.  Do not flush other
      * queues.
@@ -1247,6 +1293,19 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
         return EAGAIN;
     }
 
+    if (policer != NULL) {
+        dropped = nb_rx;
+        nb_rx = ingress_policer_run(dev, (struct rte_mbuf **) packets, nb_rx);
+        dropped -= nb_rx;
+    }
+
+    /* Update stats to reflect dropped packets */
+    if (OVS_UNLIKELY(dropped)) {
+        rte_spinlock_lock(&dev->stats_lock);
+        dev->stats.rx_dropped += dropped;
+        rte_spinlock_unlock(&dev->stats_lock);
+    }
+
     *c = nb_rx;
 
     return 0;
@@ -1689,13 +1748,13 @@ netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
     stats->tx_fifo_errors = UINT64_MAX;
     stats->tx_heartbeat_errors = UINT64_MAX;
     stats->tx_window_errors = UINT64_MAX;
-    stats->rx_dropped += UINT64_MAX;
 
     rte_spinlock_lock(&dev->stats_lock);
     /* Supported Stats */
     stats->rx_packets += dev->stats.rx_packets;
     stats->tx_packets += dev->stats.tx_packets;
     stats->tx_dropped += dev->stats.tx_dropped;
+    stats->rx_dropped = dev->stats.rx_dropped;
     stats->multicast = dev->stats.multicast;
     stats->rx_bytes = dev->stats.rx_bytes;
     stats->tx_bytes = dev->stats.tx_bytes;
@@ -1736,7 +1795,7 @@ netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
 
     /* These are the available DPDK counters for packets not received due to
      * local resource constraints in DPDK and NIC respectively. */
-    stats->rx_dropped = rte_stats.rx_nombuf + rte_stats.imissed;
+    stats->rx_dropped += rte_stats.rx_nombuf + rte_stats.imissed;
     stats->collisions = UINT64_MAX;
 
     stats->rx_length_errors = UINT64_MAX;
@@ -1804,6 +1863,104 @@ netdev_dpdk_get_features(const struct netdev *netdev,
 }
 
 static int
+netdev_dpdk_policer_construct__(struct netdev *netdev_, uint32_t rate,
+                                uint32_t burst)
+{
+    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+    struct ingress_policer *policer;
+    uint64_t rate_bytes;
+    uint64_t burst_bytes;
+    int err = 0;
+
+    policer = xmalloc(sizeof *policer);
+    rte_spinlock_init(&policer->policer_lock);
+
+    /* rte_meter requires bytes so convert kbits rate and burst to bytes. */
+    rate_bytes = rate * 125;
+    burst_bytes = burst * 125;
+
+    policer->app_srtcm_params.cir = rate_bytes;
+    policer->app_srtcm_params.cbs = burst_bytes;
+    policer->app_srtcm_params.ebs = 0;
+    err = rte_meter_srtcm_config(&policer->in_policer,
+                                    &policer->app_srtcm_params);
+    ovsrcu_set(&netdev->ingress_policer, policer);
+
+    return err;
+}
+
+static int netdev_dpdk_policer_destruct__(struct ingress_policer * policer)
+{
+    if (policer == NULL) {
+        return 0;
+    }
+    else {
+        free(policer);
+    }
+    return 0;
+}
+
+static int
+netdev_dpdk_set_policing(struct netdev* netdev_, uint32_t policer_rate,
+                            uint32_t policer_burst)
+{
+    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+    struct ingress_policer *policer;
+    int err = 0;
+
+    /* Force to 0 if no rate specified,
+     * default to 1000 kbits if burst is 0,
+     * else stick with user-specified value.
+     */
+    policer_burst = (!policer_rate ? 0
+                    : !policer_burst ? 1000
+                    : policer_burst);
+
+    ovs_mutex_lock(&netdev->mutex);
+
+    policer = ovsrcu_get_protected(struct ingress_policer *,
+                                    &netdev->ingress_policer);
+
+    if (netdev->policer_rate == policer_rate &&
+    netdev->policer_burst == policer_burst) {
+        /* Assume that settings haven't changed since we last set them. */
+        ovs_mutex_unlock(&netdev->mutex);
+        return err;
+    }
+
+    /* Destroy any existing ingress policer for the device if one exists
+     * and the policer_rate and policer_burst are being set to 0
+     */
+    if ((policer != NULL) && (policer_rate == 0) && (policer_burst == 0)) {
+        /* User has set rate and burst to 0, destroy the policer */
+        ovsrcu_postpone(netdev_dpdk_policer_destruct__, policer);
+        ovsrcu_set(&netdev->ingress_policer, NULL);
+        netdev->policer_rate = 0;
+        netdev->policer_burst = 0;
+        ovs_mutex_unlock(&netdev->mutex);
+        return err;
+    }
+
+    /* Destroy existing policer before adding new policer */
+    if (policer != NULL) {
+        ovsrcu_postpone(netdev_dpdk_policer_destruct__, policer);
+        ovsrcu_set(&netdev->ingress_policer, NULL);
+        netdev->policer_rate = 0;
+        netdev->policer_burst = 0;
+    }
+
+    /* Add an ingress policer */
+    err = netdev_dpdk_policer_construct__(netdev_, policer_rate,
+                                            policer_burst);
+
+    /* Update policer_rate and policer_burst for the netdev */
+    netdev->policer_rate = policer_rate;
+    netdev->policer_burst = policer_burst;
+    ovs_mutex_unlock(&netdev->mutex);
+    return err;
+}
+
+static int
 netdev_dpdk_get_ifindex(const struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
@@ -2250,6 +2407,12 @@ netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
     return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
 }
 
+struct ingress_policer *
+netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
+{
+    return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
+}
+
 /*
  * These callbacks allow virtio-net devices to be added to vhost ports when
  * configuration has been fully complete.
@@ -2703,7 +2866,7 @@ static const struct dpdk_qos_ops egress_policer_ops = {
     GET_FEATURES,                                             \
     NULL,                       /* set_advertisements */      \
                                                               \
-    NULL,                       /* set_policing */            \
+    netdev_dpdk_set_policing,   /* set_policing */            \
     netdev_dpdk_get_qos_types,                                \
     NULL,                       /* get_qos_capabilities */    \
     netdev_dpdk_get_qos,                                      \
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 7d6976f..a7d2391 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -2388,8 +2388,8 @@
         table="Queue"/> tables).
       </p>
       <p>
-        Policing is currently implemented only on Linux.  The Linux
-        implementation uses a simple ``token bucket'' approach:
+        Policing is currently implemented on Linux and OVS with DPDK. Both
+        implementations use a simple ``token bucket'' approach:
       </p>
       <ul>
         <li>
-- 
1.7.4.1




More information about the dev mailing list