[ovs-dev] [PATCH RFC] sflow: Export OVS PMD threads statistics via sFlow
Neil McKee
neil.mckee at inmon.com
Tue Jul 5 17:35:41 UTC 2016
Robert,
Two comments:
(1) To add an sFlow extension like this you tag it with an
IANA-assigned enterprise number from:
https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
The enterprise number goes in the high 20 bits, as described on page
25 of http://sflow.org/sflow_version_5.txt
So instead of :
SFLCOUNTERS_OVSPMD = 2208
You need something like:
#define IANA_ENTERPRISE_VMWARE 6876
SFLCOUNTERS_OVSPMD = (IANA_ENTERPRISE_VMWARE << 12) + 2208
Or perhaps:
#define IANA_ENTERPRISE_INTEL 343
SFLCOUNTERS_OVSPMD = (IANA_ENTERPRISE_INTEL << 12) + 2208
And the "2208" could be "1" since this would be the first structure
from that enterprise (I think).
(2) It would be better to export a summary instead. For example:
struct dpif_pmd_stats {
unsigned num_threads; /* Number of PMD threads */
uint64_t cycles_polling; /* Number of PMD thread polling cycles */
uint64_t cycles_processing; /* Number of PMD thread processing cycles */
uint64_t cycles_total; /* Number of all PMD thread cycles */
};
This would make the data easier to digest and act upon downstream --
remember, there may be many thousands of agents sending this every 20
seconds. It would also avoid the potential problem of a large number
of threads resulting in an MTU problem if a single counter-sample was
ever more than 1500 bytes. But of course the particular summary you
choose depends on what decision-making you want to drive with this
feed?
Neil
On Tue, Jul 5, 2016 at 5:26 AM, Robert Wojciechowicz
<robertx.wojciechowicz at intel.com> wrote:
> The OVS PMD threads utilization has been identified
> as important metric when managing large deployments.
> This patch exposes via sFlow PMD utilization metrics,
> which are also available using ovs-appctl utility
> (command: dpif-netdev/pmd-stats-show).
>
> Signed-off-by: Robert Wojciechowicz <robertx.wojciechowicz at intel.com>
> ---
> lib/dpif-netdev.c | 38 ++++++++++++++++++++++++
> lib/dpif-netlink.c | 1 +
> lib/dpif-provider.h | 4 +++
> lib/dpif.c | 12 ++++++++
> lib/dpif.h | 10 +++++++
> lib/sflow.h | 20 ++++++++++++-
> lib/sflow_receiver.c | 19 ++++++++++++
> ofproto/ofproto-dpif-sflow.c | 70 +++++++++++++++++++++++++++++++++++++++++++-
> 8 files changed, 172 insertions(+), 2 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 119e169..48367ce 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1100,6 +1100,43 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
> return 0;
> }
>
> +/* Collect PMD and main thread statistics.
> + * Modeled after the appctl utility (pmd-stats-show command).
> + *
> + * XXX: Dynamically allocates array for threads, which should be
> + * freed by caller.
> + */
> +static int
> +dpif_netdev_get_pmd_stats(const struct dpif *dpif,
> + struct dpif_pmd_stats **stats)
> +{
> + struct dp_netdev *dp = get_dp_netdev(dpif);
> + int ithr = 0, icyc = 0;
> + struct dp_netdev_pmd_thread *pmd;
> +
> + size_t nthreads = cmap_count(&dp->poll_threads);
> + struct dpif_pmd_stats* pmd_stats = xmalloc(nthreads * sizeof *pmd_stats);
> + memset(pmd_stats, 0, nthreads * sizeof *pmd_stats);
> +
> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> + uint64_t cycles[PMD_N_CYCLES];
> +
> + pmd_stats[ithr].numa_id = pmd->numa_id;
> + pmd_stats[ithr].core_id = pmd->core_id;
> + for (icyc = 0; icyc < ARRAY_SIZE(cycles); icyc++) {
> + atomic_read_relaxed(&pmd->cycles.n[icyc], &cycles[icyc]);
> + }
> + pmd_stats[ithr].cycles_polling = cycles[PMD_CYCLES_POLLING];
> + pmd_stats[ithr].cycles_processing = cycles[PMD_CYCLES_PROCESSING];
> + for (icyc = 0; icyc < ARRAY_SIZE(cycles); icyc++) {
> + pmd_stats[ithr].cycles_total += cycles[icyc];
> + }
> + ++ithr;
> + }
> + *stats = pmd_stats;
> + return nthreads;
> +}
> +
> static void
> dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
> {
> @@ -4168,6 +4205,7 @@ const struct dpif_class dpif_netdev_class = {
> dpif_netdev_run,
> dpif_netdev_wait,
> dpif_netdev_get_stats,
> + dpif_netdev_get_pmd_stats,
> dpif_netdev_port_add,
> dpif_netdev_port_del,
> dpif_netdev_port_query_by_number,
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 9bff3a8..e6a7e07 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -2348,6 +2348,7 @@ const struct dpif_class dpif_netlink_class = {
> dpif_netlink_run,
> NULL, /* wait */
> dpif_netlink_get_stats,
> + NULL,
> dpif_netlink_port_add,
> dpif_netlink_port_del,
> dpif_netlink_port_query_by_number,
> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
> index 25f4280..0768837 100644
> --- a/lib/dpif-provider.h
> +++ b/lib/dpif-provider.h
> @@ -155,6 +155,10 @@ struct dpif_class {
> /* Retrieves statistics for 'dpif' into 'stats'. */
> int (*get_stats)(const struct dpif *dpif, struct dpif_dp_stats *stats);
>
> + /* Retrieves PMD statistics for 'dpif' into 'stats'. */
> + int (*get_pmd_stats)(const struct dpif *dpif,
> + struct dpif_pmd_stats **stats);
> +
> /* Adds 'netdev' as a new port in 'dpif'. If '*port_no' is not
> * UINT32_MAX, attempts to use that as the port's port number.
> *
> diff --git a/lib/dpif.c b/lib/dpif.c
> index c4f24c7..a3a126a 100644
> --- a/lib/dpif.c
> +++ b/lib/dpif.c
> @@ -503,6 +503,18 @@ dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
> return error;
> }
>
> +/* Retrieves PMD statistics for 'dpif' into 'stats'.
> + * Returns number of PMDs. */
> +int
> +dpif_get_pmd_stats(const struct dpif *dpif, struct dpif_pmd_stats **stats)
> +{
> + int count = 0;
> + if (dpif->dpif_class->get_pmd_stats) {
> + count = dpif->dpif_class->get_pmd_stats(dpif, stats);
> + }
> + return count;
> +}
> +
> const char *
> dpif_port_open_type(const char *datapath_type, const char *port_type)
> {
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 6788301..05f669b 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -431,6 +431,15 @@ const char *dpif_type(const struct dpif *);
>
> int dpif_delete(struct dpif *);
>
> +/* Statistics for PMD threads. */
> +struct dpif_pmd_stats {
> + int numa_id; /* PMD thread numa id */
> + unsigned core_id; /* PMD thread core id */
> + uint64_t cycles_polling; /* Number of PMD thread polling cycles */
> + uint64_t cycles_processing; /* Number of PMD thread processing cycles */
> + uint64_t cycles_total; /* Number of all PMD thread cycles */
> +};
> +
> /* Statistics for a dpif as a whole. */
> struct dpif_dp_stats {
> uint64_t n_hit; /* Number of flow table matches. */
> @@ -442,6 +451,7 @@ struct dpif_dp_stats {
> uint32_t n_masks; /* Number of mega flow masks. */
> };
> int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
> +int dpif_get_pmd_stats(const struct dpif *, struct dpif_pmd_stats **);
>
>
> /* Port operations. */
> diff --git a/lib/sflow.h b/lib/sflow.h
> index 95bedd9..d55dd5b 100644
> --- a/lib/sflow.h
> +++ b/lib/sflow.h
> @@ -577,6 +577,22 @@ typedef struct _SFLOVSDP_counters {
>
> #define SFL_CTR_OVSDP_XDR_SIZE 24
>
> +/* OVS PMD threads stats */
> +
> +typedef struct _SFLPMDStat {
> + int numa_id;
> + unsigned core_id;
> + uint64_t cycles_polling;
> + uint64_t cycles_processing;
> + uint64_t cycles_total;
> +} SFLPMDStat;
> +
> +typedef struct _SFLOVSPMD_counters {
> + int pmd_count;
> + SFLPMDStat* pmd_stats;
> +} SFLOVSPMD_counters;
> +
> +
> /* Counters data */
>
> enum SFLCounters_type_tag {
> @@ -590,7 +606,8 @@ enum SFLCounters_type_tag {
> SFLCOUNTERS_OPENFLOWPORT = 1004,
> SFLCOUNTERS_PORTNAME = 1005,
> SFLCOUNTERS_APP_RESOURCES = 2203,
> - SFLCOUNTERS_OVSDP = 2207
> + SFLCOUNTERS_OVSDP = 2207,
> + SFLCOUNTERS_OVSPMD = 2208
> };
>
> typedef union _SFLCounters_type {
> @@ -604,6 +621,7 @@ typedef union _SFLCounters_type {
> SFLPortName portName;
> SFLAPPResources_counters appResources;
> SFLOVSDP_counters ovsdp;
> + SFLOVSPMD_counters ovspmd;
> } SFLCounters_type;
>
> typedef struct _SFLCounters_sample_element {
> diff --git a/lib/sflow_receiver.c b/lib/sflow_receiver.c
> index cde1359..948ae15 100644
> --- a/lib/sflow_receiver.c
> +++ b/lib/sflow_receiver.c
> @@ -655,6 +655,11 @@ static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
> case SFLCOUNTERS_PORTNAME: elemSiz = stringEncodingLength(&elem->counterBlock.portName.portName); break;
> case SFLCOUNTERS_APP_RESOURCES: elemSiz = SFL_CTR_APP_RESOURCES_XDR_SIZE; break;
> case SFLCOUNTERS_OVSDP: elemSiz = SFL_CTR_OVSDP_XDR_SIZE; break;
> + case SFLCOUNTERS_OVSPMD: elemSiz = \
> + elem->counterBlock.ovspmd.pmd_count \
> + * sizeof *elem->counterBlock.ovspmd.pmd_stats \
> + + sizeof elem->counterBlock.ovspmd.pmd_count;
> + break;
> default:
> sflError(receiver, "unexpected counters_tag");
> return -1;
> @@ -675,6 +680,7 @@ static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
> int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs)
> {
> int packedSize;
> + int i = 0;
> if(cs == NULL) return -1;
> // if the sample pkt is full enough so that this sample might put
> // it over the limit, then we should send it now.
> @@ -795,6 +801,19 @@ int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
> putNet32(receiver, elem->counterBlock.ovsdp.n_flows);
> putNet32(receiver, elem->counterBlock.ovsdp.n_masks);
> break;
> + case SFLCOUNTERS_OVSPMD:
> + putNet32(receiver, elem->counterBlock.ovspmd.pmd_count);
> + for (i=0; i<elem->counterBlock.ovspmd.pmd_count; i++) {
> + putNet32(receiver, elem->counterBlock.ovspmd.pmd_stats[i].numa_id);
> + putNet32(receiver, elem->counterBlock.ovspmd.pmd_stats[i].core_id);
> + putNet64(receiver,
> + elem->counterBlock.ovspmd.pmd_stats[i].cycles_polling);
> + putNet64(receiver,
> + elem->counterBlock.ovspmd.pmd_stats[i].cycles_processing);
> + putNet64(receiver,
> + elem->counterBlock.ovspmd.pmd_stats[i].cycles_total);
> + }
> + break;
> default:
> sflError(receiver, "unexpected counters_tag");
> return -1;
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index fbc82b7..0699903 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -216,6 +216,40 @@ sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
> dp_totals->n_mask_hit += dp_stats.n_mask_hit;
> dp_totals->n_masks += dp_stats.n_masks;
> }
> +
> + dpif_close(dpif);
> + }
> + }
> + sset_destroy(&names);
> + }
> + }
> + sset_destroy(&types);
> + return count;
> +}
> +
> +/* Call to get the PMD stats. Modeled after the appctl utility.
> + *
> + * Return number of PMDs found
> + */
> +static int
> +sflow_get_pmd_stats(struct dpif_sflow *ds OVS_UNUSED,
> + struct dpif_pmd_stats **stats)
> +{
> + struct sset types;
> + const char *type;
> + int count = 0;
> +
> + sset_init(&types);
> + dp_enumerate_types(&types);
> + SSET_FOR_EACH (type, &types) {
> + struct sset names;
> + const char *name;
> + sset_init(&names);
> + if (dp_enumerate_names(type, &names) == 0) {
> + SSET_FOR_EACH (name, &names) {
> + struct dpif *dpif;
> + if (dpif_open(name, type, &dpif) == 0) {
> + count = dpif_get_pmd_stats(dpif, stats);
> dpif_close(dpif);
> }
> }
> @@ -259,9 +293,12 @@ sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
> OVS_REQUIRES(mutex)
> {
> struct dpif_sflow *ds = ds_;
> - SFLCounters_sample_element dp_elem, res_elem;
> + SFLCounters_sample_element dp_elem, res_elem, pmd_elem;
> struct dpif_dp_stats dp_totals;
> + struct dpif_pmd_stats *pmd_stats = NULL;
> struct rusage usage;
> + int i = 0;
> + int count = 0;
>
> if (!sflow_global_counters_subid_test(poller->agent->subId)) {
> /* Another sub-agent is currently responsible for this. */
> @@ -280,6 +317,29 @@ sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
> SFLADD_ELEMENT(cs, &dp_elem);
> }
>
> + /* PMD threads stats */
> + count = sflow_get_pmd_stats(ds, &pmd_stats);
> + if (count > 0) {
> + memset(&pmd_elem, 0, sizeof pmd_elem);
> + pmd_elem.tag = SFLCOUNTERS_OVSPMD;
> + pmd_elem.counterBlock.ovspmd.pmd_count = count;
> + pmd_elem.counterBlock.ovspmd.pmd_stats = \
> + xmalloc(count * sizeof *pmd_elem.counterBlock.ovspmd.pmd_stats);
> + for (i=0; i<count; i++) {
> + pmd_elem.counterBlock.ovspmd.pmd_stats[i].numa_id = \
> + pmd_stats[i].numa_id;
> + pmd_elem.counterBlock.ovspmd.pmd_stats[i].core_id = \
> + pmd_stats[i].core_id;
> + pmd_elem.counterBlock.ovspmd.pmd_stats[i].cycles_polling = \
> + pmd_stats[i].cycles_polling;
> + pmd_elem.counterBlock.ovspmd.pmd_stats[i].cycles_processing = \
> + pmd_stats[i].cycles_processing;
> + pmd_elem.counterBlock.ovspmd.pmd_stats[i].cycles_total = \
> + pmd_stats[i].cycles_total;
> + }
> + SFLADD_ELEMENT(cs, &pmd_elem);
> + }
> +
> /* resource usage */
> getrusage(RUSAGE_SELF, &usage);
> res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
> @@ -295,7 +355,15 @@ sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
> SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
>
> SFLADD_ELEMENT(cs, &res_elem);
> +
> sfl_poller_writeCountersSample(poller, cs);
> +
> + if (pmd_stats) {
> + free(pmd_elem.counterBlock.ovspmd.pmd_stats);
> + pmd_elem.counterBlock.ovspmd.pmd_stats = NULL;
> + free(pmd_stats);
> + pmd_stats = NULL;
> + }
> }
>
> static void
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list