[ovs-dev] [PATCH ovs v2 1/4] dpif-netdev: Expand the meters supported number

xiangxia.m.yue at gmail.com xiangxia.m.yue at gmail.com
Wed May 13 13:31:32 UTC 2020


From: Tonghao Zhang <xiangxia.m.yue at gmail.com>

For now, ovs-vswitchd use the array of the dp_meter struct
to store meter's data, and at most, there are only 65536
(defined by MAX_METERS) meters that can be used. But in some
case, for example, in the edge gateway, we should use 200,000,
at least, meters for IP address bandwidth limitation.
Every one IP address will use two meters for its rx and tx
path[1]. In other way, ovs-vswitchd should support meter-offload
(rte_mtr_xxx api introduced by dpdk.), but there are more than
65536 meters in the hardware, such as Mellanox ConnectX-6.

This patch use array to manage the meter, but it can ben expanded.

[1].
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

Cc: Ilya Maximets <i.maximets at ovn.org>
Cc: William Tu <u9012063 at gmail.com>
Cc: Jarno Rajahalme <jarno at ovn.org>
Cc: Ben Pfaff <blp at ovn.org>
Cc: Andy Zhou <azhou at ovn.org>
Cc: Pravin Shelar <pshelar at ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com>
---
v2:
* add comments for dp_meter_instance
* change the log
* remove extra newline
* I don't move the dp_netdev_meter_init/destroy up. because
  them depends other meters function and put all meter function
  together may make the codes clean.
---
 lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 250 insertions(+), 69 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ef14e83b5f06..b5deaab31eb0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
-enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
-enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
+
+/* Maximum number of meters in the table. */
+#define METER_ENTRY_MAX (200000ULL)
+/* Maximum number of bands / meter. */
+#define METER_BAND_MAX  (8)
+#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
 
 COVERAGE_DEFINE(datapath_drop_meter);
 COVERAGE_DEFINE(datapath_drop_upcall_error);
@@ -283,12 +286,26 @@ struct dp_meter {
     uint16_t flags;
     uint16_t n_bands;
     uint32_t max_delta_t;
+    uint32_t id;
+    struct ovs_mutex lock;
     uint64_t used;
     uint64_t packet_count;
     uint64_t byte_count;
     struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+    uint32_t n_meters;
+    /* Followed by struct dp_meter[n]; where n is the n_meters. */
+    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
+};
+
+struct dp_meter_table {
+    OVSRCU_TYPE(struct dp_meter_instance *) ti;
+    uint32_t count;
+    struct ovs_mutex lock;
+};
+
 struct pmd_auto_lb {
     bool auto_lb_requested;     /* Auto load balancing requested by user. */
     bool is_enabled;            /* Current status of Auto load balancing. */
@@ -329,8 +346,7 @@ struct dp_netdev {
     atomic_uint32_t tx_flush_interval;
 
     /* Meters. */
-    struct ovs_mutex meter_locks[N_METER_LOCKS];
-    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+    struct dp_meter_table meter_tbl;
 
     /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
     OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
@@ -378,19 +394,6 @@ struct dp_netdev {
     struct pmd_auto_lb pmd_alb;
 };
 
-static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                     odp_port_t)
     OVS_REQUIRES(dp->port_mutex);
@@ -1523,6 +1526,9 @@ choose_port(struct dp_netdev *dp, const char *name)
     return ODPP_NONE;
 }
 
+static void dp_netdev_meter_init(struct dp_meter_table *tbl);
+static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);
+
 static int
 create_dp_netdev(const char *name, const struct dpif_class *class,
                  struct dp_netdev **dpp)
@@ -1556,9 +1562,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
     dp->reconfigure_seq = seq_create();
     dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
 
-    for (int i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_init(&dp->meter_tbl);
 
     /* Disable upcalls by default. */
     dp_netdev_disable_upcall(dp);
@@ -1647,16 +1651,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
     fat_rwlock_destroy(&dp->upcall_rwlock);
 }
 
-static void
-dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
-    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    if (dp->meters[meter_id]) {
-        free(dp->meters[meter_id]);
-        dp->meters[meter_id] = NULL;
-    }
-}
-
 /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
  * through the 'dp_netdevs' shash while freeing 'dp'. */
 static void
@@ -1694,16 +1688,7 @@ dp_netdev_free(struct dp_netdev *dp)
     /* Upcalls must be disabled at this point */
     dp_netdev_destroy_upcall_lock(dp);
 
-    int i;
-
-    for (i = 0; i < MAX_METERS; ++i) {
-        meter_lock(dp, i);
-        dp_delete_meter(dp, i);
-        meter_unlock(dp, i);
-    }
-    for (i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_destroy(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_destroy(&dp->meter_tbl);
 
     free(dp->pmd_cmask);
     free(CONST_CAST(char *, dp->name));
@@ -5713,14 +5698,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
 
 
 /* Meters */
+static uint32_t
+meter_hash(struct dp_meter_instance *ti, uint32_t id)
+{
+    uint32_t n_meters = ti->n_meters;
+
+    return id % n_meters;
+}
+
+static void
+dp_meter_free(struct dp_meter *meter)
+{
+    ovs_mutex_destroy(&meter->lock);
+    free(meter);
+}
+
+static struct dp_meter_instance *
+dp_meter_instance_alloc(const uint32_t size)
+{
+    struct dp_meter_instance *ti;
+
+    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);
+    ti->n_meters = size;
+
+    return ti;
+}
+
+static void
+dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
+{
+    struct dp_meter_instance *new_ti;
+    struct dp_meter_instance *ti;
+    int n_meters;
+    int i;
+
+    new_ti = dp_meter_instance_alloc(size);
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
+    n_meters = MIN(size, ti->n_meters);
+
+    for (i = 0; i < n_meters; i++) {
+        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
+            new_ti->dp_meters[i] = ti->dp_meters[i];
+        }
+    }
+
+    ovsrcu_set(&tbl->ti, new_ti);
+    ovsrcu_postpone(free, ti);
+}
+
+static void
+dp_meter_instance_insert(struct dp_meter_instance *ti,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(ti, meter->id);
+    ovsrcu_set(&ti->dp_meters[hash], meter);
+}
+
+static void
+dp_meter_instance_remove(struct dp_meter_instance *ti,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(ti, meter->id);
+    ovsrcu_set(&ti->dp_meters[hash], NULL);
+}
+
+static void
+dp_netdev_meter_init(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *ti;
+
+    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+    ovsrcu_set(&tbl->ti, ti);
+
+    ovs_mutex_init(&tbl->lock);
+    tbl->count = 0;
+}
+
+static void
+dp_netdev_meter_destroy(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *ti;
+    int i;
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
+    for (i = 0; i < ti->n_meters; i++) {
+        struct dp_meter *meter;
+
+        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
+        if (meter) {
+            ovsrcu_postpone(dp_meter_free, meter);
+        }
+    }
+
+    ovsrcu_postpone(free, ti);
+    ovs_mutex_destroy(&tbl->lock);
+}
+
+static struct dp_meter *
+dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
+{
+    struct dp_meter_instance *ti;
+    struct dp_meter *meter;
+    uint32_t hash;
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
+    hash = meter_hash(ti, meter_id);
+
+    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
+    if (meter && meter->id == meter_id) {
+        return meter;
+    }
+
+    return NULL;
+}
+
+static void
+dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
+                     OVS_REQUIRES(meter_tbl->lock)
+{
+    struct dp_meter_instance *ti;
+    struct dp_meter *meter;
+
+    meter = dp_meter_lookup(meter_tbl, meter_id);
+    if (!meter) {
+        return;
+    }
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
+    dp_meter_instance_remove(ti, meter);
+    ovsrcu_postpone(dp_meter_free, meter);
+
+    meter_tbl->count--;
+    /* Shrink the meter array if necessary. */
+    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+        meter_tbl->count <= (ti->n_meters / 4)) {
+        int half_size = ti->n_meters / 2;
+        int i;
+
+        /* Avoid hash collision, don't move slots to other place.
+         * Make sure there are no references of meters in array
+         * which will be released.
+         */
+        for (i = half_size; i < ti->n_meters; i++) {
+            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
+                return;
+            }
+        }
+
+        dp_meter_instance_realloc(meter_tbl, half_size);
+    }
+}
+
+static int
+dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
+                OVS_REQUIRES(meter_tbl->lock)
+{
+    struct dp_meter_instance *ti;
+    uint32_t hash;
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
+    hash = meter_hash(ti, meter->id);
+
+    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
+                                &ti->dp_meters[hash]))) {
+        VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n",
+                  meter->id, hash, ti->n_meters);
+        return EBUSY;
+    }
+
+    dp_meter_instance_insert(ti, meter);
+
+    meter_tbl->count++;
+    if (meter_tbl->count >= ti->n_meters) {
+        dp_meter_instance_realloc(meter_tbl, ti->n_meters * 2);
+    }
+
+    return 0;
+}
+
 static void
 dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
                                struct ofputil_meter_features *features)
 {
-    features->max_meters = MAX_METERS;
+    features->max_meters = METER_ENTRY_MAX;
     features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
     features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
-    features->max_bands = MAX_BANDS;
+    features->max_bands = METER_BAND_MAX;
     features->max_color = 0;
 }
 
@@ -5742,14 +5910,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     uint32_t exceeded_rate[NETDEV_MAX_BURST];
     int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return;
     }
 
-    meter_lock(dp, meter_id);
-    meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        goto out;
+        return;
     }
 
     /* Initialize as negative values. */
@@ -5757,6 +5924,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     /* Initialize as zeroes. */
     memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
 
+    ovs_mutex_lock(&meter->lock);
     /* All packets will hit the meter at the same time. */
     long_delta_t = now / 1000 - meter->used / 1000; /* msec */
 
@@ -5874,8 +6042,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
             dp_packet_batch_refill(packets_, packet, j);
         }
     }
- out:
-    meter_unlock(dp, meter_id);
+
+    ovs_mutex_unlock(&meter->lock);
 }
 
 /* Meter set/get/del processing is still single-threaded. */
@@ -5884,11 +6052,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
                       struct ofputil_meter_config *config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     uint32_t mid = meter_id.uint32;
     struct dp_meter *meter;
-    int i;
+    int err, i;
 
-    if (mid >= MAX_METERS) {
+    if (mid >= METER_ENTRY_MAX) {
         return EFBIG; /* Meter_id out of range. */
     }
 
@@ -5896,7 +6065,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         return EBADF; /* Unsupported flags set */
     }
 
-    if (config->n_bands > MAX_BANDS) {
+    if (config->n_bands > METER_BAND_MAX) {
         return EINVAL;
     }
 
@@ -5917,6 +6086,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
     meter->n_bands = config->n_bands;
     meter->max_delta_t = 0;
     meter->used = time_usec();
+    meter->id = mid;
+    ovs_mutex_init(&meter->lock);
 
     /* set up bands */
     for (i = 0; i < config->n_bands; ++i) {
@@ -5942,12 +6113,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         }
     }
 
-    meter_lock(dp, mid);
-    dp_delete_meter(dp, mid); /* Free existing meter, if any */
-    dp->meters[mid] = meter;
-    meter_unlock(dp, mid);
+    ovs_mutex_lock(&meter_tbl->lock);
+
+    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
+    err = dp_meter_attach(meter_tbl, meter);
+    if (err) {
+        goto unlock_out;
+    }
+
+    ovs_mutex_unlock(&meter_tbl->lock);
 
     return 0;
+
+unlock_out:
+    ovs_mutex_unlock(&meter_tbl->lock);
+    dp_meter_free(meter);
+    return err;
 }
 
 static int
@@ -5955,23 +6136,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
                       ofproto_meter_id meter_id_,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
-    const struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev *dp = get_dp_netdev(dpif);
     uint32_t meter_id = meter_id_.uint32;
-    int retval = 0;
+    const struct dp_meter *meter;
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return EFBIG;
     }
 
-    meter_lock(dp, meter_id);
-    const struct dp_meter *meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        retval = ENOENT;
-        goto done;
+        return ENOENT;
     }
+
     if (stats) {
         int i = 0;
 
+        ovs_mutex_lock(&meter->lock);
         stats->packet_in_count = meter->packet_count;
         stats->byte_in_count = meter->byte_count;
 
@@ -5979,13 +6160,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
             stats->bands[i].packet_count = meter->bands[i].packet_count;
             stats->bands[i].byte_count = meter->bands[i].byte_count;
         }
+        ovs_mutex_unlock(&meter->lock);
 
         stats->n_bands = i;
     }
 
-done:
-    meter_unlock(dp, meter_id);
-    return retval;
+    return 0;
 }
 
 static int
@@ -5994,15 +6174,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     int error;
 
     error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
     if (!error) {
         uint32_t meter_id = meter_id_.uint32;
 
-        meter_lock(dp, meter_id);
-        dp_delete_meter(dp, meter_id);
-        meter_unlock(dp, meter_id);
+        ovs_mutex_lock(&meter_tbl->lock);
+        dp_meter_detach_free(meter_tbl, meter_id);
+        ovs_mutex_unlock(&meter_tbl->lock);
     }
     return error;
 }
-- 
2.26.1



More information about the dev mailing list