[ovs-dev] [RFC PATCH 3/4] dpif-netdev: Skip encap action during datapath execution

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Wed May 13 09:34:40 UTC 2020


On Wed, May 6, 2020 at 5:56 PM Eli Britstein <elibr at mellanox.com> wrote:
>
>
> On 4/24/2020 11:23 AM, Sriharsha Basavapatna wrote:
> > In this patch we check if action processing (apart from OUTPUT action),
> > should be skipped for a given dp_netdev_flow. Specifically, we check if
> > the action is TNL_PUSH and if it has been offloaded to HW, then we do not
> > push the tunnel header in SW. The datapath only executes the OUTPUT action.
> > The packet will be encapsulated in HW during transmit.
> >
> > Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
> > ---
> >   lib/dpif-netdev.c | 28 ++++++++++++++++++----------
> >   1 file changed, 18 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index a47230067..7fcc0b06d 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -799,7 +799,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> >                                         bool should_steal,
> >                                         const struct flow *flow,
> >                                         const struct nlattr *actions,
> > -                                      size_t actions_len);
> > +                                      size_t actions_len,
> > +                                      const struct dp_netdev_flow *dp_flow);
> >   static void dp_netdev_input(struct dp_netdev_pmd_thread *,
> >                               struct dp_packet_batch *, odp_port_t port_no);
> >   static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> > @@ -3986,7 +3987,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
> >       dp_packet_batch_init_packet(&pp, execute->packet);
> >       pp.do_not_steal = true;
> >       dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
> > -                              execute->actions, execute->actions_len);
> > +                              execute->actions, execute->actions_len, NULL);
> >       dp_netdev_pmd_flush_output_packets(pmd, true);
> >
> >       if (pmd->core_id == NON_PMD_CORE_ID) {
> > @@ -6614,7 +6615,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
> >       actions = dp_netdev_flow_get_actions(flow);
> >
> >       dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
> > -                              actions->actions, actions->size);
> > +                              actions->actions, actions->size, flow);
> >   }
> >
> >   static inline void
> > @@ -6922,7 +6923,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
> >        * we'll send the packet up twice. */
> >       dp_packet_batch_init_packet(&b, packet);
> >       dp_netdev_execute_actions(pmd, &b, true, &match.flow,
> > -                              actions->data, actions->size);
> > +                              actions->data, actions->size, NULL);
> >
> >       add_actions = put_actions->size ? put_actions : actions;
> >       if (OVS_LIKELY(error != ENOSPC)) {
> > @@ -7157,6 +7158,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
> >   struct dp_netdev_execute_aux {
> >       struct dp_netdev_pmd_thread *pmd;
> >       const struct flow *flow;
> > +    const void *dp_flow;    /* for partial action offload */
> Why void * and not struct dp_netdev_flow *?

[Harsha] Yes, it doesn't have to be a void *, I'll change it.
> >   };
> >
> >   static void
> > @@ -7301,7 +7303,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
> >       if (!error || error == ENOSPC) {
> >           dp_packet_batch_init_packet(&b, packet);
> >           dp_netdev_execute_actions(pmd, &b, should_steal, flow,
> > -                                  actions->data, actions->size);
> > +                                  actions->data, actions->size, NULL);
> >       } else if (should_steal) {
> >           dp_packet_delete(packet);
> >           COVERAGE_INC(datapath_drop_userspace_action_error);
> > @@ -7320,6 +7322,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >       int type = nl_attr_type(a);
> >       struct tx_port *p;
> >       uint32_t packet_count, packets_dropped;
> > +    struct dp_netdev_flow *dp_flow = aux->dp_flow;
> >
> >       switch ((enum ovs_action_attr)type) {
> >       case OVS_ACTION_ATTR_OUTPUT:
> > @@ -7377,9 +7380,13 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >           }
> >           dp_packet_batch_apply_cutlen(packets_);
> >           packet_count = dp_packet_batch_size(packets_);
> > -        if (push_tnl_action(pmd, a, packets_)) {
> > -            COVERAGE_ADD(datapath_drop_tunnel_push_error,
> > -                         packet_count);
> > +        /* Execute tnl_push action in SW, only if it is not offloaded
> > +         * as a partial action in HW. Otherwise, HW pushes the tunnel
> > +         * header during output processing. */
> > +        if (!dp_flow || !dp_flow->partial_actions_offloaded) {
> > +            if (push_tnl_action(pmd, a, packets_)) {
> > +                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
> > +            }
>
> Few comments here:
>
> - If there are multiple egress encaps, this introduces a bug.

[Harsha] As explained in my response in Patch-2, we don't support
multiple egress encaps, it should get rejected at the time of
selecting the flow for offload.
>
> - This is not synced with the HW insertion rule (another thread). If the
> actual rule insertion occurs in the middle of the batch, this code won’t
> take place, and the later packets will be do double encap.

[Harsha] I agree; we need some synchronization between the threads to
avoid this. I'll add this in the next revision.
>
> - I've tried the patchset on my system. The egress flow failed (I didn't
> look into the failure reason) and it fell back to mark/rss. So, it was
> considered as a success and this SW encap is skipped, ending up with
> non-encapsulated packet on the wire.

[Harsha] I'm guessing that the egress partial offload request got
rejected by the pmd, maybe because it is expecting an output action
(RTE Port-ID action) ? But this being partial action offload, the
output action is pruned (see parse_flow_actions() in patch-2) before
offloading the flow. So, the pmd should be fixed to accept the offload
request.
The second part, that is, fall back to mark/rss on failure, is an
issue in the new partial offload code. Since this is an 'egress' flow,
we should not fall back to mark/rss. I'll fix this.



>
> >           }
> >           return;
> >
> > @@ -7667,9 +7674,10 @@ static void
> >   dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> >                             struct dp_packet_batch *packets,
> >                             bool should_steal, const struct flow *flow,
> > -                          const struct nlattr *actions, size_t actions_len)
> > +                          const struct nlattr *actions, size_t actions_len,
> > +                          const struct dp_netdev_flow *dp_flow)
> >   {
> > -    struct dp_netdev_execute_aux aux = { pmd, flow };
> > +    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
> >
> >       odp_execute_actions(&aux, packets, should_steal, actions,
> >                           actions_len, dp_execute_cb);


More information about the dev mailing list