[ovs-dev] [PATCH v7 5/6] dpif-netdev: do hw flow offload in a thread

Stokes, Ian ian.stokes at intel.com
Thu Mar 15 11:56:56 UTC 2018



> -----Original Message-----
> From: Stokes, Ian
> Sent: Monday, March 12, 2018 3:54 PM
> To: Yuanhan Liu <yliu at fridaylinux.org>; dev at openvswitch.org
> Cc: Finn Christensen <fc at napatech.com>; Darrell Ball <dball at vmware.com>;
> Chandran, Sugesh <sugesh.chandran at intel.com>; Simon Horman
> <simon.horman at netronome.com>
> Subject: RE: [PATCH v7 5/6] dpif-netdev: do hw flow offload in a thread
> 

Adding Shahaf Shuler
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yliu at fridaylinux.org]
> > Sent: Monday, January 29, 2018 7:00 AM
> > To: dev at openvswitch.org
> > Cc: Stokes, Ian <ian.stokes at intel.com>; Finn Christensen
> > <fc at napatech.com>; Darrell Ball <dball at vmware.com>; Chandran, Sugesh
> > <sugesh.chandran at intel.com>; Simon Horman
> > <simon.horman at netronome.com>; Yuanhan Liu <yliu at fridaylinux.org>
> > Subject: [PATCH v7 5/6] dpif-netdev: do hw flow offload in a thread
> >
> > Currently, the major trigger for hw flow offload is at upcall
> > handling, which is actually in the datapath. Moreover, the hw offload
> > installation and modification is not that lightweight. Meaning, if
> > there are so many flows being added or modified frequently, it could
> > stall the datapath, which could result to packet loss.
> >
> > To diminish that, all those flow operations will be recorded and
> > appended to a list. A thread is then introduced to process this list
> > (to do the real flow offloading put/del operations). This could leave
> > the datapath as lightweight as possible.
> >
> > Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
> > ---
> >
> > v5: - removed an not-used mutex lock
> > ---
> >  lib/dpif-netdev.c | 348
> > ++++++++++++++++++++++++++++++++++++++++---------
> > -----
> >  1 file changed, 258 insertions(+), 90 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > cb16e2e..b1f6973
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -1854,13 +1854,11 @@ struct flow_mark {
> >      struct cmap megaflow_to_mark;
> >      struct cmap mark_to_flow;
> >      struct id_pool *pool;
> > -    struct ovs_mutex mutex;
> >  };
> >
> >  static struct flow_mark flow_mark = {
> >      .megaflow_to_mark = CMAP_INITIALIZER,
> >      .mark_to_flow = CMAP_INITIALIZER,
> > -    .mutex = OVS_MUTEX_INITIALIZER,
> >  };
> >
> >  static uint32_t
> > @@ -2001,6 +1999,8 @@ mark_to_flow_disassociate(struct
> > dp_netdev_pmd_thread *pmd,
> >      return ret;
> >  }
> >
> > +static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
> > +                                  struct dp_netdev_flow *flow);
> 
> This seems out of place here, suggest moving it towards the beginning of
> the file along with the other function prototypes for dpif-netdev.
> 
> >  static void
> >  flow_mark_flush(struct dp_netdev_pmd_thread *pmd)  { @@ -2008,7
> > +2008,7 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd)
> >
> >      CMAP_FOR_EACH (flow, mark_node, &flow_mark.mark_to_flow) {
> >          if (flow->pmd_id == pmd->core_id) {
> > -            mark_to_flow_disassociate(pmd, flow);
> > +            queue_netdev_flow_del(pmd, flow);
> >          }
> >      }
> >  }
> > @@ -2030,6 +2030,257 @@ mark_to_flow_find(const struct
> > dp_netdev_pmd_thread *pmd,
> >      return NULL;
> >  }
> >
> > +enum {
> > +    DP_NETDEV_FLOW_OFFLOAD_OP_ADD,
> > +    DP_NETDEV_FLOW_OFFLOAD_OP_MOD,
> > +    DP_NETDEV_FLOW_OFFLOAD_OP_DEL,
> > +};
> 
> Again, I'd prefer enums like this to be kept at the beginning of the file
> rather than interspersed through the functions.
> It probably warrants defining a specific HWOL either at the top of this
> file or preferably spun out to a separate HWOL module all together so as
> not to overload the dpif-netdev code.
> 
> > +
> > +struct dp_flow_offload_item {
> > +    struct dp_netdev_pmd_thread *pmd;
> > +    struct dp_netdev_flow *flow;
> > +    int op;
> > +    struct match match;
> > +    struct nlattr *actions;
> > +    size_t actions_len;
> > +
> > +    struct ovs_list node;
> > +};
> > +
> > +struct dp_flow_offload {
> > +    struct ovs_mutex mutex;
> > +    struct ovs_list list;
> > +    pthread_cond_t cond;
> > +};
> > +
> > +static struct dp_flow_offload dp_flow_offload = {
> > +    .mutex = OVS_MUTEX_INITIALIZER,
> > +    .list  = OVS_LIST_INITIALIZER(&dp_flow_offload.list),
> > +};
> > +
> > +static struct ovsthread_once offload_thread_once
> > +    = OVSTHREAD_ONCE_INITIALIZER;
> > +
> > +static struct dp_flow_offload_item *
> > +dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd,
> > +                             struct dp_netdev_flow *flow,
> > +                             int op)
> > +{
> > +    struct dp_flow_offload_item *offload;
> > +
> > +    offload = xzalloc(sizeof(*offload));
> > +    offload->pmd = pmd;
> > +    offload->flow = flow;
> > +    offload->op = op;
> > +
> > +    dp_netdev_flow_ref(flow);
> > +    dp_netdev_pmd_try_ref(pmd);
> > +
> > +    return offload;
> > +}
> > +
> > +static void
> > +dp_netdev_free_flow_offload(struct dp_flow_offload_item *offload) {
> > +    dp_netdev_pmd_unref(offload->pmd);
> > +    dp_netdev_flow_unref(offload->flow);
> > +
> > +    free(offload->actions);
> > +    free(offload);
> > +}
> > +
> > +static void
> > +dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload) {
> > +    ovs_mutex_lock(&dp_flow_offload.mutex);
> > +    ovs_list_push_back(&dp_flow_offload.list, &offload->node);
> > +    xpthread_cond_signal(&dp_flow_offload.cond);
> > +    ovs_mutex_unlock(&dp_flow_offload.mutex);
> > +}
> > +
> > +static int
> > +dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload) {
> > +    return mark_to_flow_disassociate(offload->pmd, offload->flow); }
> > +
> > +/*
> > + * There are two flow offload operations here: addition and
> modification.
> > + *
> > + * For flow addition, this function does:
> > + * - allocate a new flow mark id
> > + * - perform hardware flow offload
> > + * - associate the flow mark with flow and mega flow
> > + *
> > + * For flow modification, both flow mark and the associations are
> > +still
> > + * valid, thus only item 2 needed.
> > + */
> > +static int
> > +dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) {
> > +    struct dp_netdev_port *port;
> > +    struct dp_netdev_pmd_thread *pmd = offload->pmd;
> > +    struct dp_netdev_flow *flow = offload->flow;
> > +    odp_port_t in_port = flow->flow.in_port.odp_port;
> > +    bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
> > +    struct offload_info info;
> > +    uint32_t mark;
> > +    int ret;
> > +
> > +    if (flow->dead) {
> > +        return -1;
> > +    }
> > +
> > +    if (modification) {
> > +        mark = flow->mark;
> > +        ovs_assert(mark != INVALID_FLOW_MARK);
> > +    } else {
> > +        /*
> > +         * If a mega flow has already been offloaded (from other PMD
> > +         * instances), do not offload it again.
> > +         */
> > +        mark = megaflow_to_mark_find(&flow->mega_ufid);
> > +        if (mark != INVALID_FLOW_MARK) {
> > +            VLOG_DBG("flow has already been offloaded with mark
> > + %u\n",
> > mark);
> > +            if (flow->mark != INVALID_FLOW_MARK) {
> > +                ovs_assert(flow->mark == mark);
> > +            } else {
> > +                mark_to_flow_associate(mark, flow);
> > +            }
> > +            return 0;
> > +        }
> > +
> > +        mark = flow_mark_alloc();
> > +        if (mark == INVALID_FLOW_MARK) {
> > +            VLOG_ERR("failed to allocate flow mark!\n");
> > +        }
> > +    }
> > +    info.flow_mark = mark;
> > +
> > +    ovs_mutex_lock(&pmd->dp->port_mutex);
> > +    port = dp_netdev_lookup_port(pmd->dp, in_port);
> > +    if (!port) {
> > +        ovs_mutex_unlock(&pmd->dp->port_mutex);
> > +        return -1;
> > +    }
> > +    ret = netdev_flow_put(port->netdev, &offload->match,
> > +                          CONST_CAST(struct nlattr *, offload-
> >actions),
> > +                          offload->actions_len, &flow->mega_ufid,
> &info,
> > +                          NULL);
> > +    ovs_mutex_unlock(&pmd->dp->port_mutex);
> > +
> > +    if (ret) {
> > +        if (!modification) {
> > +            flow_mark_free(mark);
> > +        } else {
> > +            mark_to_flow_disassociate(pmd, flow);
> > +        }
> > +        return -1;
> > +    }
> > +
> > +    if (!modification) {
> > +        megaflow_to_mark_associate(&flow->mega_ufid, mark);
> > +        mark_to_flow_associate(mark, flow);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static void *
> > +dp_netdev_flow_offload_main(void *data OVS_UNUSED) {
> > +    struct dp_flow_offload_item *offload;
> > +    struct ovs_list *list;
> > +    const char *op;
> > +    int ret;
> > +
> > +    for (;;) {
> > +        ovs_mutex_lock(&dp_flow_offload.mutex);
> > +        if (ovs_list_is_empty(&dp_flow_offload.list)) {
> > +            ovsrcu_quiesce_start();
> > +            ovs_mutex_cond_wait(&dp_flow_offload.cond,
> > +                                &dp_flow_offload.mutex);
> > +        }
> > +        list = ovs_list_pop_front(&dp_flow_offload.list);
> > +        offload = CONTAINER_OF(list, struct dp_flow_offload_item,
> node);
> > +        ovs_mutex_unlock(&dp_flow_offload.mutex);
> > +
> > +        switch (offload->op) {
> > +        case DP_NETDEV_FLOW_OFFLOAD_OP_ADD:
> > +            op = "add";
> > +            ret = dp_netdev_flow_offload_put(offload);
> > +            break;
> > +        case DP_NETDEV_FLOW_OFFLOAD_OP_MOD:
> > +            op = "modify";
> > +            ret = dp_netdev_flow_offload_put(offload);
> > +            break;
> > +        case DP_NETDEV_FLOW_OFFLOAD_OP_DEL:
> > +            op = "delete";
> > +            ret = dp_netdev_flow_offload_del(offload);
> > +            break;
> > +        default:
> > +            OVS_NOT_REACHED();
> > +        }
> > +
> > +        VLOG_DBG("%s to %s netdev flow\n",
> > +                 ret == 0 ? "succeed" : "failed", op);
> > +        dp_netdev_free_flow_offload(offload);
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
> > +                      struct dp_netdev_flow *flow) {
> > +    struct dp_flow_offload_item *offload;
> > +
> > +    if (ovsthread_once_start(&offload_thread_once)) {
> > +        xpthread_cond_init(&dp_flow_offload.cond, NULL);
> > +        ovs_thread_create("dp_netdev_flow_offload",
> > +                          dp_netdev_flow_offload_main, NULL);
> > +        ovsthread_once_done(&offload_thread_once);
> > +    }
> > +
> > +    offload = dp_netdev_alloc_flow_offload(pmd, flow,
> > +
> > DP_NETDEV_FLOW_OFFLOAD_OP_DEL);
> > +    dp_netdev_append_flow_offload(offload);
> > +}
> > +
> > +static void
> > +queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
> > +                      struct dp_netdev_flow *flow, struct match *match,
> > +                      const struct nlattr *actions, size_t
> > +actions_len) {
> > +    struct dp_flow_offload_item *offload;
> > +    int op;
> > +
> > +    if (!netdev_is_flow_api_enabled()) {
> > +        return;
> > +    }
> > +
> > +    if (ovsthread_once_start(&offload_thread_once)) {
> > +        xpthread_cond_init(&dp_flow_offload.cond, NULL);
> > +        ovs_thread_create("dp_netdev_flow_offload",
> > +                          dp_netdev_flow_offload_main, NULL);
> > +        ovsthread_once_done(&offload_thread_once);
> > +    }
> > +
> > +    if (flow->mark != INVALID_FLOW_MARK) {
> > +        op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
> > +    } else {
> > +        op = DP_NETDEV_FLOW_OFFLOAD_OP_ADD;
> > +    }
> > +    offload = dp_netdev_alloc_flow_offload(pmd, flow, op);
> > +    offload->match = *match;
> > +    offload->actions = xmalloc(actions_len);
> > +    memcpy(offload->actions, actions, actions_len);
> > +    offload->actions_len = actions_len;
> > +
> > +    dp_netdev_append_flow_offload(offload);
> > +}
> > +
> >  static void
> >  dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
> >                            struct dp_netdev_flow *flow) @@ -2044,7
> > +2295,7 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
> >      dpcls_remove(cls, &flow->cr);
> >      cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow-
> > >ufid));
> >      if (flow->mark != INVALID_FLOW_MARK) {
> > -        mark_to_flow_disassociate(pmd, flow);
> > +        queue_netdev_flow_del(pmd, flow);
> >      }
> >      flow->dead = true;
> >
> > @@ -2625,88 +2876,6 @@ out:
> >      return error;
> >  }
> >
> > -/*
> > - * There are two flow offload operations here: addition and
> modification.
> > - *
> > - * For flow addition, this function does:
> > - * - allocate a new flow mark id
> > - * - perform hardware flow offload
> > - * - associate the flow mark with flow and mega flow
> > - *
> > - * For flow modification, both flow mark and the associations are
> > still
> > - * valid, thus only item 2 needed.
> > - */
> > -static void
> > -try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, odp_port_t
> in_port,
> > -                    struct dp_netdev_flow *flow, struct match *match,
> > -                    const struct nlattr *actions, size_t actions_len)
> > -{
> > -    struct offload_info info;
> > -    struct dp_netdev_port *port;
> > -    bool modification = flow->mark != INVALID_FLOW_MARK;
> > -    const char *op = modification ? "modify" : "add";
> > -    uint32_t mark;
> > -    int ret;
> > -
> > -    ovs_mutex_lock(&flow_mark.mutex);
> > -
> > -    if (modification) {
> > -        mark = flow->mark;
> > -    } else {
> > -        if (!netdev_is_flow_api_enabled()) {
> > -            goto out;
> > -        }
> > -
> > -        /*
> > -         * If a mega flow has already been offloaded (from other PMD
> > -         * instances), do not offload it again.
> > -         */
> > -        mark = megaflow_to_mark_find(&flow->mega_ufid);
> > -        if (mark != INVALID_FLOW_MARK) {
> > -            VLOG_DBG("flow has already been offloaded with mark %u\n",
> > mark);
> > -            mark_to_flow_associate(mark, flow);
> > -            goto out;
> > -        }
> > -
> > -        mark = flow_mark_alloc();
> > -        if (mark == INVALID_FLOW_MARK) {
> > -            VLOG_ERR("failed to allocate flow mark!\n");
> > -            goto out;
> > -        }
> > -    }
> > -    info.flow_mark = mark;
> > -
> > -    ovs_mutex_lock(&pmd->dp->port_mutex);
> > -    port = dp_netdev_lookup_port(pmd->dp, in_port);
> > -    if (!port) {
> > -        ovs_mutex_unlock(&pmd->dp->port_mutex);
> > -        goto out;
> > -    }
> > -    ret = netdev_flow_put(port->netdev, match,
> > -                          CONST_CAST(struct nlattr *, actions),
> > -                          actions_len, &flow->mega_ufid, &info, NULL);
> > -    ovs_mutex_unlock(&pmd->dp->port_mutex);
> > -
> > -    if (ret) {
> > -        VLOG_ERR("failed to %s netdev flow with mark %u\n", op, mark);
> > -        if (!modification) {
> > -            flow_mark_free(mark);
> > -        } else {
> > -            mark_to_flow_disassociate(pmd, flow);
> > -        }
> > -        goto out;
> > -    }
> > -
> > -    if (!modification) {
> > -        megaflow_to_mark_associate(&flow->mega_ufid, mark);
> > -        mark_to_flow_associate(mark, flow);
> > -    }
> > -    VLOG_DBG("succeed to %s netdev flow with mark %u\n", op, mark);
> > -
> > -out:
> > -    ovs_mutex_unlock(&flow_mark.mutex);
> > -}
> > -
> >  static void
> >  dp_netdev_get_mega_ufid(const struct match *match, ovs_u128
> > *mega_ufid) { @@ -2772,7 +2941,7 @@ dp_netdev_flow_add(struct
> > dp_netdev_pmd_thread *pmd,
> >      cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *,
> > &flow-
> > >node),
> >                  dp_netdev_flow_hash(&flow->ufid));
> >
> > -    try_netdev_flow_put(pmd, in_port, flow, match, actions,
> actions_len);
> > +    queue_netdev_flow_put(pmd, flow, match, actions, actions_len);
> >
> >      if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) {
> >          struct ds ds = DS_EMPTY_INITIALIZER; @@ -2854,7 +3023,6 @@
> > flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
> >          if (put->flags & DPIF_FP_MODIFY) {
> >              struct dp_netdev_actions *new_actions;
> >              struct dp_netdev_actions *old_actions;
> > -            odp_port_t in_port = netdev_flow->flow.in_port.odp_port;
> >
> >              new_actions = dp_netdev_actions_create(put->actions,
> >                                                     put->actions_len);
> > @@
> > -2862,8 +3030,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
> >              old_actions = dp_netdev_flow_get_actions(netdev_flow);
> >              ovsrcu_set(&netdev_flow->actions, new_actions);
> >
> > -            try_netdev_flow_put(pmd, in_port, netdev_flow, match,
> > -                                put->actions, put->actions_len);
> > +            queue_netdev_flow_put(pmd, netdev_flow, match,
> > +                                  put->actions, put->actions_len);
> >
> >              if (stats) {
> >                  get_dpif_flow_stats(netdev_flow, stats);
> > --
> > 2.7.4



More information about the dev mailing list