[ovs-dev] [PATCH v3] dpif-netdev: Expand the meter capacity.

Tonghao Zhang xiangxia.m.yue at gmail.com
Fri Jun 11 08:33:34 UTC 2021


On Wed, May 12, 2021 at 5:18 PM <xiangxia.m.yue at gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue at gmail.com>
>
> For now, ovs-vswitchd use the array of the dp_meter struct
> to store meter's data, and at most, there are only 65536
> (defined by MAX_METERS) meters that can be used. But in some
> case, for example, in the edge gateway, we should use 200,000+,
> at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx
> path[1]. In other way, ovs-vswitchd should support meter-offload
> (rte_mtr_xxx api introduced by dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
>
> This patch use cmap to manage the meter, instead of the array.
>
> * Insertion performance, ovs-ofctl add-meter 1000+ meters,
>   the cmap takes abount 4000ms, as same as previous implementation.
> * Lookup performance in datapath, we add 1000+ meters which rate limit
>   are 10Gbps (the NIC cards are 10Gbps, so netdev-datapath will not
>   drop the packets.), and a flow which only forward packets from p0
>   to p1, with meter action[2]. On other machine, pktgen-dpdk will
>   generate 64B packets to p0.
>
>   The forwarding performance always is 1324 Kpps on my server
>   which CPU is Intel E5-2650, 2.00GHz.
>
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
>
> [2].
> $ in_port=p0 action=meter:100,output:p1
>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com>
Hi, friendly ping
> ---
> v3:
> * update the commit message
> * remove dp_netdev_meter struct
> * remove create_dp_netdev function
> * don't use the hash_basis
> * use the meter_id as a hash instead of hash_xxx function. see *dp_meter_hash for details
> * fix coding style
> * v2: http://patchwork.ozlabs.org/project/openvswitch/patch/1584254601-7321-1-git-send-email-xiangxia.m.yue@gmail.com/
> ---
>  lib/dpif-netdev.c | 158 ++++++++++++++++++++++++++++------------------
>  1 file changed, 97 insertions(+), 61 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 251788b04965..f800c8c65e13 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -99,9 +99,8 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
>  #define DEFAULT_TX_FLUSH_INTERVAL 0
>
>  /* Configuration parameters. */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> +enum { MAX_METERS = 1 << 18 };    /* Maximum number of meters. */
>  enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
>
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -287,6 +286,9 @@ struct dp_meter_band {
>  };
>
>  struct dp_meter {
> +    struct cmap_node node;
> +    struct ovs_mutex lock;
> +    uint32_t id;
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> @@ -339,8 +341,8 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct ovs_mutex meters_lock;
> +    struct cmap meters OVS_GUARDED;
>
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -392,19 +394,6 @@ struct dp_netdev {
>      struct cmap tx_bonds; /* Contains 'struct tx_bond'. */
>  };
>
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1747,6 +1736,68 @@ choose_port(struct dp_netdev *dp, const char *name)
>      return ODPP_NONE;
>  }
>
> +static uint32_t
> +dp_meter_hash(uint32_t meter_id)
> +{
> +    /* In the ofproto-dpif layer, we use the id-pool
> +     * to alloc meter id orderly (e.g. 1, 2, ... N.),
> +     * which provide a better hash distribution.
> +     * Use them directly instead of hash_xxx function
> +     * for achieving high-performance.
> +     */
> +    return meter_id;
> +}
> +
> +static void
> +dp_netdev_meter_destroy(struct dp_netdev *dp)
> +{
> +    struct dp_meter *m;
> +
> +    ovs_mutex_lock(&dp->meters_lock);
> +    CMAP_FOR_EACH (m, node, &dp->meters) {
> +        cmap_remove(&dp->meters, &m->node, dp_meter_hash(m->id));
> +        ovsrcu_postpone(free, m);
> +    }
> +
> +    cmap_destroy(&dp->meters);
> +    ovs_mutex_unlock(&dp->meters_lock);
> +
> +    ovs_mutex_destroy(&dp->meters_lock);
> +}
> +
> +static struct dp_meter*
> +dp_meter_lookup(struct cmap *meters, uint32_t meter_id)
> +{
> +    uint32_t hash = dp_meter_hash(meter_id);
> +    struct dp_meter *m;
> +
> +    CMAP_FOR_EACH_WITH_HASH (m, node, hash, meters) {
> +        if (m->id == meter_id) {
> +            return m;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct cmap *meters, uint32_t meter_id)
> +{
> +    struct dp_meter *m;
> +
> +    m = dp_meter_lookup(meters, meter_id);
> +    if (m) {
> +            cmap_remove(meters, &m->node, dp_meter_hash(meter_id));
> +            ovsrcu_postpone(free, m);
> +    }
> +}
> +
> +static void
> +dp_meter_attach(struct cmap *meters, struct dp_meter *meter)
> +{
> +    cmap_insert(meters, &meter->node, dp_meter_hash(meter->id));
> +}
> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp)
> @@ -1783,9 +1834,9 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    /* Init meter resources. */
> +    cmap_init(&dp->meters);
> +    ovs_mutex_init(&dp->meters_lock);
>
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1874,16 +1925,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  static uint32_t
>  hash_bond_id(uint32_t bond_id)
>  {
> @@ -1938,16 +1979,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_destroy(dp);
>
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -6175,10 +6207,9 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>          return;
>      }
>
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meters, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>
>      /* Initialize as negative values. */
> @@ -6186,6 +6217,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>
> +    ovs_mutex_lock(&meter->lock);
>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>
> @@ -6301,8 +6333,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>
>  /* Meter set/get/del processing is still single-threaded. */
> @@ -6344,6 +6376,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) {
> @@ -6368,10 +6402,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          }
>      }
>
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&dp->meters_lock);
> +
> +    dp_meter_detach_free(&dp->meters, mid); /* Free existing meter, if any */
> +    dp_meter_attach(&dp->meters, meter);
> +
> +    ovs_mutex_unlock(&dp->meters_lock);
>
>      return 0;
>  }
> @@ -6381,23 +6417,24 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>
>      if (meter_id >= MAX_METERS) {
>          return EFBIG;
>      }
>
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meters, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
>          int i = 0;
>
> +        ovs_mutex_lock(&meter->lock);
> +
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count;
>
> @@ -6406,12 +6443,11 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
>
> +        ovs_mutex_unlock(&meter->lock);
>          stats->n_bands = i;
>      }
>
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>
>  static int
> @@ -6426,9 +6462,9 @@ dpif_netdev_meter_del(struct dpif *dpif,
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&dp->meters_lock);
> +        dp_meter_detach_free(&dp->meters, meter_id);
> +        ovs_mutex_unlock(&dp->meters_lock);
>      }
>      return error;
>  }
> --
> 2.27.0
>


-- 
Best regards, Tonghao


More information about the dev mailing list