[ovs-dev] [dpdk patch 5/8] dpif-netdev: Create 'number of dpdk ifaces on cpu socket' pmd threads for each cpu socket.

Pravin Shelar pshelar at nicira.com
Sat Aug 30 19:02:34 UTC 2014


On Mon, Aug 11, 2014 at 9:56 PM, Alex Wang <alexw at nicira.com> wrote:
> The pmd threads are pinned to available cpu cores on the
> corresponding cpu socket.  Note, core 0 is not pinnable
> and is reserved for all non-pmd threads.
>
> Signed-off-by: Alex Wang <alexw at nicira.com>
> ---
>  lib/dpif-netdev.c |  254 +++++++++++++++++++++++++++++++++++++++++------------
>  lib/dpif-netdev.h |    2 +-
>  lib/netdev-dpdk.c |   40 ++++++---
>  lib/netdev-dpdk.h |   15 ++++
>  4 files changed, 244 insertions(+), 67 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c637d9f..14784bf 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -52,6 +52,7 @@
>  #include "odp-util.h"
>  #include "ofp-print.h"
>  #include "ofpbuf.h"
> +#include "ovs-numa.h"
>  #include "ovs-rcu.h"
>  #include "packet-dpif.h"
>  #include "packets.h"
> @@ -71,6 +72,7 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
>  #define NETDEV_RULE_PRIORITY 0x8000
>
>  #define FLOW_DUMP_MAX_BATCH 50
> +
>  /* Use per thread recirc_depth to prevent recirculation loop. */
>  #define MAX_RECIRC_DEPTH 5
>  DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
> @@ -142,11 +144,9 @@ struct dp_netdev {
>      struct fat_rwlock upcall_rwlock;
>      exec_upcall_cb *upcall_cb;  /* Callback function for executing upcalls. */
>
> -    /* Forwarding threads. */
> -    struct latch exit_latch;
> -    struct pmd_thread *pmd_threads;
> -    size_t n_pmd_threads;
> -    int pmd_count;
> +    /* Per-cpu-socket struct for configuring pmd threads. */
> +    struct pmd_socket *pmd_sockets;
> +    int n_pmd_sockets;
>  };
>
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
> @@ -281,6 +281,15 @@ struct dp_netdev_actions *dp_netdev_flow_get_actions(
>      const struct dp_netdev_flow *);
>  static void dp_netdev_actions_free(struct dp_netdev_actions *);
>
> +/* Represents the PMD configuration on a cpu socket. */
> +struct pmd_socket {
> +    struct dp_netdev *dp;
> +    struct latch exit_latch;
> +    struct pmd_thread *pmd_threads;
> +    int socket_id;
> +    int n_pmd_threads;
> +};
> +
We should keep socket to core mapping in numa module rather than in dpif-netdev.
I am not sure why exit latch needs to be per socket, it is global
today, it should be ok for now, no?

>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>   * the performance overhead of interrupt processing.  Therefore netdev can
>   * not implement rx-wait for these devices.  dpif-netdev needs to poll
> @@ -293,9 +302,10 @@ static void dp_netdev_actions_free(struct dp_netdev_actions *);
>   * table, and executes the actions it finds.
>   **/
>  struct pmd_thread {
> -    struct dp_netdev *dp;
> +    struct pmd_socket *socket;
>      pthread_t thread;
> -    int id;
> +    int index;
> +    int core_id;
>      atomic_uint change_seq;
>  };
>
....

>  static void *
>  pmd_thread_main(void *f_)
>  {
>      struct pmd_thread *f = f_;
> -    struct dp_netdev *dp = f->dp;
> +    struct dp_netdev *dp = f->socket->dp;
>      unsigned int lc = 0;
>      struct rxq_poll *poll_list;
> +    struct non_local_pmd_dev *dev_list;
>      unsigned int port_seq;
> -    int poll_cnt;
> +    int poll_cnt, dev_cnt;
>      int i;
>
>      poll_cnt = 0;
> +    dev_cnt = 0;
>      poll_list = NULL;
> +    dev_list = NULL;
>
> -    pmd_thread_setaffinity_cpu(f->id);
> +    pmd_thread_setaffinity_cpu(f->core_id);
>  reload:
>      poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt);
> +    dev_cnt = pmd_get_non_local_pmd_dev(f, &dev_list, dev_cnt);
>      atomic_read(&f->change_seq, &port_seq);
>
>      for (;;) {
> @@ -1682,6 +1777,10 @@ reload:
>              dp_netdev_process_rxq_port(dp,  poll_list[i].port, poll_list[i].rx);
>          }
>
> +        for (i = 0; i < dev_cnt; i++) {
> +            netdev_dpdk_flush_non_local(dev_list[i].dev, f->core_id);
> +        }
> +

In transmit function we can flush if this is remote queue. To optimize
remote queue check on every xmit, we can add remote flag to
dpdk-netdev queue.



More information about the dev mailing list