[ovs-dev] [PATCH v5 3/3] dpif-netdev: Detection and logging of suspicious PMD iterations

O Mahony, Billy billy.o.mahony at intel.com
Tue Jan 9 16:25:13 UTC 2018


Hi Jan,

Thanks these patches are really very useful.

I haven't finished trying them out but thought you'd prefer to get initial comments earlier. I'll continue to try to them out and revert with any further other comments later.

Regards,
Billy.

> -----Original Message-----
> From: ovs-dev-bounces at openvswitch.org [mailto:ovs-dev-
> bounces at openvswitch.org] On Behalf Of Jan Scheurich
> Sent: Thursday, January 4, 2018 12:08 PM
> To: dev at openvswitch.org
> Subject: [ovs-dev] [PATCH v5 3/3] dpif-netdev: Detection and logging of
> suspicious PMD iterations
> 
> This patch enhances dpif-netdev-perf to detect iterations with suspicious
> statistics according to the following criteria:
> 
> - iteration lasts longer than US_THR microseconds (default 250).
>   This can be used to capture events where a PMD is blocked or
>   interrupted for such a period of time that there is a risk for
>   dropped packets on any of its Rx queues.
> 
> - max vhost qlen exceeds a threshold Q_THR (default 128). This can
>   be used to infer virtio queue overruns and dropped packets inside
>   a VM, which are not visible in OVS otherwise.
> 
> Such suspicious iterations can be logged together with their iteration
> statistics to be able to correlate them to packet drop or other events outside
> OVS.
> 
> A new command is introduced to enable/disable logging at run-time and to
> adjust the above thresholds for suspicious iterations:
> 
> ovs-appctl dpif-netdev/pmd-perf-log-set [on|off]
>     [-b before] [-a after] [-us usec] [-q qlen]
> 
> Turn logging on or off at run-time (on|off).
> 
> -b before:  The number of iterations before the suspicious iteration to
>             be logged (default 5).
> -a after:   The number of iterations after the suspicious iteration to
>             be logged (default 5).
> -q qlen:    Suspicious vhost queue fill level threshold. Increase this
>             to 512 if the Qemu supports 1024 virtio queue length.
>             (default 128).
> -us usec:   change the duration threshold for a suspicious iteration
>             (default 250 us).
> 
> If more than 100 iterations before or after a suspicious iteration have been
> looged once, OVS falls back to the safe default values (5/5) to avoid that
[[BO'M]] typo
> logging itself causes continuos further logging.
[[BO'M]] typo
> 
> Signed-off-by: Jan Scheurich <jan.scheurich at ericsson.com>
> ---
>  lib/dpif-netdev-perf.c | 142
> +++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/dpif-netdev-perf.h |  32 +++++++++++
>  lib/dpif-netdev.c      |   5 ++
>  3 files changed, 179 insertions(+)
> 
> diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c index
> a66a48c..3fd19b0 100644
> --- a/lib/dpif-netdev-perf.c
> +++ b/lib/dpif-netdev-perf.c
> @@ -28,6 +28,23 @@
> 
>  VLOG_DEFINE_THIS_MODULE(pmd_perf);
> 
> +#define ITER_US_THRESHOLD 250   /* Warning threshold for iteration
> duration
> +                                   in microseconds. */
> +#define VHOST_QUEUE_FULL 128    /* Size of the virtio TX queue. */
> +#define LOG_IT_BEFORE 5         /* Number of iteration to log before
> +                                   suspicious iteration. */
> +#define LOG_IT_AFTER 5          /* Number of iteration to log after
> +                                   suspicious iteration. */
> +
> +bool log_on = false;
> +static uint32_t log_it_before = LOG_IT_BEFORE; static uint32_t
> +log_it_after = LOG_IT_AFTER; static uint32_t log_us_thr =
> +ITER_US_THRESHOLD; uint32_t log_q_thr = VHOST_QUEUE_FULL; uint64_t
> +iter_cycle_threshold;
> +
> +static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600,
> +600);
> +
>  #ifdef DPDK_NETDEV
>  static uint64_t
>  get_tsc_hz(void)
> @@ -133,6 +150,8 @@ pmd_perf_stats_init(struct pmd_perf_stats *s) {
>      histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
>      histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
>      s->start_ms = time_msec();
> +    s->log_begin_it = UINT64_MAX;
> +    s->log_end_it = UINT64_MAX;
>  }
> 
>  void
> @@ -368,6 +387,129 @@ pmd_perf_stats_clear(struct pmd_perf_stats *s)
>      histogram_clear(&s->max_vhost_qfill);
>      history_init(&s->iterations);
>      history_init(&s->milliseconds);
> +    s->log_begin_it = UINT64_MAX;
> +    s->log_end_it = UINT64_MAX;
>      s->start_ms = time_msec(); /* Clearing finished. */
>      s->milliseconds.sample[0].timestamp = s->start_ms;  }
> +
> +void
> +pmd_perf_log_suspicious_iteration(struct pmd_perf_stats *s,
> +                                     uint64_t cycles,
> +                                     char *reason) {
> +    VLOG_WARN_RL(&latency_rl,
> +                 "Suspicious iteration (%s): tsc=%"PRIu64
> +                 " duration=%"PRIu64" us\n",
> +                 reason, s->current.timestamp,
> +                 (1000000L * cycles) / get_tsc_hz());
[[BO'M]] get_tsc_hz calls sleep(1) so won't this block the datapath when logging is turned on? I haven't tested trying to inject occasional long processing interval.


> +    if (log_it_before + log_it_after > 0) {
> +        if (s->log_begin_it == UINT64_MAX) {
> +            s->log_begin_it =
> +                    (s->iterations.idx + HISTORY_LEN - log_it_before)
> +                        % HISTORY_LEN;
> +        }
> +        s->log_end_it =
> +                (s->iterations.idx + log_it_after) % HISTORY_LEN;
> +    }
> +}
> +
> +void
> +pmd_perf_log_iteration_neighborhood(struct pmd_perf_stats *s) {
> +    struct ds log = DS_EMPTY_INITIALIZER;
> +    pmd_perf_format_iteration_history(&log, s,
> +            (s->log_end_it + HISTORY_LEN - s->log_begin_it) % HISTORY_LEN);
> +    VLOG_WARN_RL(&latency_rl,
> +                 "Neighborhood of suspicious iteration:\n"
> +                 "%s", ds_cstr(&log));
> +    ds_destroy(&log);
> +    s->log_end_it = s->log_begin_it = UINT64_MAX;
> +    if (log_it_before > 100 || log_it_after > 100) {
> +        /* Reset to safe default values to avoid disturbance. */
> +        log_it_before = LOG_IT_BEFORE;
> +        log_it_after = LOG_IT_AFTER;
> +    }
> +}
> +
> +void
> +pmd_perf_log_set_cmd(struct unixctl_conn *conn,
> +                 int argc, const char *argv[],
> +                 void *aux OVS_UNUSED)
> +{
> +    int it_before, it_after, us_thr, q_thr;
> +    bool on;
> +    bool usage = false;
> +
> +    on = log_on;
> +    it_before = log_it_before;
> +    it_after = log_it_after;
> +    q_thr = log_q_thr;
> +    us_thr = log_us_thr;
> +
> +    while (argc > 1) {
> +        if (!strcmp(argv[1], "on")) {
> +            on = true;
> +            argc--;
> +            argv++;
> +        } else if (!strcmp(argv[1], "off")) {
> +            on = false;
> +            argc--;
> +            argv++;
> +        } else if (!strcmp(argv[1], "-a")) {
> +            it_after = strtol(argv[2], NULL, 10);
> +            if (it_after < 0) {
> +                it_after = 0;
> +            } else if (it_before + it_after > HISTORY_LEN-1) {
> +                it_after = HISTORY_LEN-1 - it_before;
> +            }
> +            argc -= 2;
> +            argv += 2;
> +        } else if (!strcmp(argv[1], "-b")) {
> +            it_before = strtol(argv[2], NULL, 10);
> +            if (it_before < 0) {
> +                it_before = 0;
> +            } else if (it_before > HISTORY_LEN-10) {
> +                it_before = HISTORY_LEN-10;
> +            }
> +            argc -= 2;
> +            argv += 2;
> +        } else if (!strcmp(argv[1], "-q")) {
> +            q_thr = strtol(argv[2], NULL, 10);
> +            if (q_thr < 0) {
> +                q_thr = 0;
> +            }
> +            argc -= 2;
> +            argv += 2;
> +        } else if (!strcmp(argv[1], "-us")) {
> +            us_thr = strtol(argv[2], NULL, 10);
> +            if (us_thr < 0) {
> +                us_thr = 0;
> +            }
> +            argc -= 2;
> +            argv += 2;
> +        } else {
> +            usage = true;
> +            break;
> +        }
> +    }
> +
> +    if (usage) {
> +        unixctl_command_reply_error(conn,
> +                "Usage: ovs-appctl dpif-netdev/pmd-perf-log-set "
> +                "[on|off] [-b before] [-a after] [-us usec] [-q qlen]");
> +        return;
> +    }
> +
> +    VLOG_INFO("pmd-perf-log-set: %s, before=%d, after=%d, us_thr=%d,"
> +              " q_thr=%d\n",
> +              on ? "on" : "off", it_before, it_after, us_thr, q_thr);
> +    log_on = on;
> +    log_it_before = it_before;
> +    log_it_after = it_after;
> +    log_q_thr = q_thr;
> +    log_us_thr = us_thr;
> +    iter_cycle_threshold = (log_us_thr * get_tsc_hz()) / 1000000L;
> +
> +    unixctl_command_reply(conn, "");
> +}
> diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h index
> 3611723..392ca80 100644
> --- a/lib/dpif-netdev-perf.h
> +++ b/lib/dpif-netdev-perf.h
> @@ -109,6 +109,8 @@ struct pmd_perf_stats {
>      struct histogram max_vhost_qfill;
>      struct history iterations;
>      struct history milliseconds;
> +    uint64_t log_begin_it;
> +    uint64_t log_end_it;
>  };
> 
>  enum pmd_info_type;
> @@ -121,6 +123,10 @@ struct pmd_perf_params {
>      size_t ms_hist_len;
>  };
> 
> +extern bool log_on;
> +extern uint32_t log_q_thr;
> +extern uint64_t iter_cycle_threshold;
> +
>  void pmd_perf_stats_init(struct pmd_perf_stats *s);  void
> pmd_perf_stats_clear(struct pmd_perf_stats *s);  void
> pmd_perf_read_counters(struct pmd_perf_stats *s, @@ -145,9 +151,17
> @@ void pmd_perf_format_iteration_history(struct ds *str,  void
> pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
>                                  int n_ms);
> 
> +void pmd_perf_log_suspicious_iteration(struct pmd_perf_stats *s,
> +                                       uint64_t cycles,
> +                                       char *reason); void
> +pmd_perf_log_iteration_neighborhood(struct pmd_perf_stats *s);
>  void pmd_perf_show(struct unixctl_conn *conn, int argc,
>                     const char *argv[],
>                     void *aux OVS_UNUSED);
> +void pmd_perf_log_set_cmd(struct unixctl_conn *conn,
> +                      int argc, const char *argv[],
> +                      void *aux OVS_UNUSED);
> +
>  static inline void
>  histogram_add_sample(struct histogram *hist, uint32_t val)  { @@ -210,6
> +224,7 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t
> now_tsc,  {
>      struct iter_stats *cum_ms;
>      uint64_t cycles, cycles_per_pkt = 0;
> +    char *reason = NULL;
> 
>      if (OVS_UNLIKELY(s->current.timestamp == 0)) {
>          /* Stats were cleared during the ongoing iteration. */ @@ -263,6 +278,23
> @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, uint64_t now_tsc,
>      cum_ms->batches += s->current.batches;
>      cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
> 
> +    if (log_on) {
> +        /* Log suspicious iterations. */
> +        if (cycles > iter_cycle_threshold) {
> +            reason = "Excessive total cycles";
> +        } else if (s->current.max_vhost_qfill >= log_q_thr) {
> +            reason = "Vhost RX queue full";
> +        }
> +        if (OVS_UNLIKELY(reason)) {
> +            pmd_perf_log_suspicious_iteration(s, cycles, reason);
> +        }
> +
> +        /* Log iteration stats interval when required. */
> +        if (OVS_UNLIKELY(s->log_end_it == s->iterations.idx)) {
> +            pmd_perf_log_iteration_neighborhood(s);
> +        }
> +    }
> +
>      /* Store in iteration history. */
>      history_store(&s->iterations, &s->current);
>      if (now_tsc - s->last_tsc > 10000) { diff --git a/lib/dpif-netdev.c b/lib/dpif-
> netdev.c index 8f64df3..96cc4d5 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1201,6 +1201,11 @@ dpif_netdev_init(void)
>      unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
>                               0, 1, dpif_netdev_pmd_rebalance,
>                               NULL);
> +    unixctl_command_register("dpif-netdev/pmd-perf-log-set",
> +                             "[on|off] [-b before] [-a after] "
> +                             "[-us usec] [-q qlen]",
> +                             0, 10, pmd_perf_log_set_cmd,
> +                             NULL);
>      return 0;
>  }
> 
> --
> 1.9.1
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list