[ovs-dev] [RFC v3 PATCH 3/5] dpif-netdev: Skip encap action during datapath execution

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Mon Jun 29 09:49:06 UTC 2020


On Tue, Jun 2, 2020 at 11:47 AM Eli Britstein <elibr at mellanox.com> wrote:
>
>
> On 6/1/2020 8:29 PM, Sriharsha Basavapatna wrote:
> > On Mon, Jun 1, 2020 at 9:18 PM Eli Britstein <elibr at mellanox.com> wrote:
> >>
> >> On 6/1/2020 6:15 PM, Sriharsha Basavapatna wrote:
> >>> On Mon, Jun 1, 2020 at 7:58 PM Eli Britstein <elibr at mellanox.com> wrote:
> >>>> On 5/28/2020 11:19 AM, Sriharsha Basavapatna wrote:
> >>>>> In this patch we check if action processing (apart from OUTPUT action),
> >>>>> should be skipped for a given dp_netdev_flow. Specifically, we check if
> >>>>> the action is TNL_PUSH and if it has been offloaded to HW, then we do not
> >>>>> push the tunnel header in SW. The datapath only executes the OUTPUT action.
> >>>>> The packet will be encapsulated in HW during transmit.
> >>>>>
> >>>>> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
> >>>>> ---
> >>>>>     lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++----------
> >>>>>     1 file changed, 36 insertions(+), 10 deletions(-)
> >>>>>
> >>>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> >>>>> index 781b233f4..3e175c25e 100644
> >>>>> --- a/lib/dpif-netdev.c
> >>>>> +++ b/lib/dpif-netdev.c
> >>>>> @@ -112,6 +112,7 @@ COVERAGE_DEFINE(datapath_drop_recirc_error);
> >>>>>     COVERAGE_DEFINE(datapath_drop_invalid_port);
> >>>>>     COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
> >>>>>     COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
> >>>>> +COVERAGE_DEFINE(datapath_skip_tunnel_push);
> >>>>>
> >>>>>     /* Protects against changes to 'dp_netdevs'. */
> >>>>>     static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
> >>>>> @@ -538,6 +539,16 @@ struct dp_netdev_flow {
> >>>>>         bool dead;
> >>>>>         uint32_t mark;               /* Unique flow mark assigned to a flow */
> >>>>>
> >>>>> +    /* The next two members are used to support partial offloading of
> >>>>> +     * actions. The boolean flag tells if this flow has its actions partially
> >>>>> +     * offloaded. The egress port# tells if the action should be offloaded
> >>>>> +     * on the egress (output) port instead of the in-port for the flow. Note
> >>>>> +     * that we support flows with a single egress port action.
> >>>>> +     * (see MAX_ACTION_ATTRS for related comments).
> >>>>> +     */
> >>>>> +    bool partial_actions_offloaded;
> >>>>> +    odp_port_t  egress_offload_port;
> >>>>> +
> >>>>>         /* Statistics. */
> >>>>>         struct dp_netdev_flow_stats stats;
> >>>>>
> >>>>> @@ -791,7 +802,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> >>>>>                                           bool should_steal,
> >>>>>                                           const struct flow *flow,
> >>>>>                                           const struct nlattr *actions,
> >>>>> -                                      size_t actions_len);
> >>>>> +                                      size_t actions_len,
> >>>>> +                                      const struct dp_netdev_flow *dp_flow);
> >>>>>     static void dp_netdev_input(struct dp_netdev_pmd_thread *,
> >>>>>                                 struct dp_packet_batch *, odp_port_t port_no);
> >>>>>     static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> >>>>> @@ -3828,7 +3840,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
> >>>>>         dp_packet_batch_init_packet(&pp, execute->packet);
> >>>>>         pp.do_not_steal = true;
> >>>>>         dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
> >>>>> -                              execute->actions, execute->actions_len);
> >>>>> +                              execute->actions, execute->actions_len, NULL);
> >>>>>         dp_netdev_pmd_flush_output_packets(pmd, true);
> >>>>>
> >>>>>         if (pmd->core_id == NON_PMD_CORE_ID) {
> >>>>> @@ -6456,7 +6468,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
> >>>>>         actions = dp_netdev_flow_get_actions(flow);
> >>>>>
> >>>>>         dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
> >>>>> -                              actions->actions, actions->size);
> >>>>> +                              actions->actions, actions->size, flow);
> >>>>>     }
> >>>>>
> >>>>>     static inline void
> >>>>> @@ -6764,7 +6776,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
> >>>>>          * we'll send the packet up twice. */
> >>>>>         dp_packet_batch_init_packet(&b, packet);
> >>>>>         dp_netdev_execute_actions(pmd, &b, true, &match.flow,
> >>>>> -                              actions->data, actions->size);
> >>>>> +                              actions->data, actions->size, NULL);
> >>>>>
> >>>>>         add_actions = put_actions->size ? put_actions : actions;
> >>>>>         if (OVS_LIKELY(error != ENOSPC)) {
> >>>>> @@ -6999,6 +7011,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
> >>>>>     struct dp_netdev_execute_aux {
> >>>>>         struct dp_netdev_pmd_thread *pmd;
> >>>>>         const struct flow *flow;
> >>>>> +    const struct dp_netdev_flow *dp_flow;    /* for partial action offload */
> >>>>>     };
> >>>>>
> >>>>>     static void
> >>>>> @@ -7143,7 +7156,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
> >>>>>         if (!error || error == ENOSPC) {
> >>>>>             dp_packet_batch_init_packet(&b, packet);
> >>>>>             dp_netdev_execute_actions(pmd, &b, should_steal, flow,
> >>>>> -                                  actions->data, actions->size);
> >>>>> +                                  actions->data, actions->size, NULL);
> >>>>>         } else if (should_steal) {
> >>>>>             dp_packet_delete(packet);
> >>>>>             COVERAGE_INC(datapath_drop_userspace_action_error);
> >>>>> @@ -7162,6 +7175,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >>>>>         int type = nl_attr_type(a);
> >>>>>         struct tx_port *p;
> >>>>>         uint32_t packet_count, packets_dropped;
> >>>>> +    struct dp_netdev_flow *dp_flow = aux->dp_flow;
> >>>>>
> >>>>>         switch ((enum ovs_action_attr)type) {
> >>>>>         case OVS_ACTION_ATTR_OUTPUT:
> >>>>> @@ -7219,9 +7233,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >>>>>             }
> >>>>>             dp_packet_batch_apply_cutlen(packets_);
> >>>>>             packet_count = dp_packet_batch_size(packets_);
> >>>>> -        if (push_tnl_action(pmd, a, packets_)) {
> >>>>> -            COVERAGE_ADD(datapath_drop_tunnel_push_error,
> >>>>> -                         packet_count);
> >>>>> +        /* Execute tnl_push action in SW, if it is not offloaded as a partial
> >>>>> +         * action in HW. Otherwise, HW pushes the tunnel header during output
> >>>>> +         * processing. There's a small window here in which the offload thread
> >>>>> +         * offloads the flow, but the partial_actions_offloaded flag is still
> >>>>> +         * not set. In this case, as the packet is already encapsulated, it
> >>>>> +         * wouldn't match the offloaded flow and the action won't be executed
> >>>>> +         * in HW.
> >>>> I have created a scenario in which it would hit. Just set the inner
> >>>> properties as the outer. Of course it is not the common case, but it
> >>>> takes only an example.
> >>>>
> >>>> Maybe add a logic not to offload if the matches would apply to the outer.
> >>> The outer properties specifically outer mac addresses are resolved by
> >>> OVS and then added to the header to be pushed. So I'm not sure how you
> >>> created this scenario if I'm correctly understanding your comment. Can
> >>> you please provide some details (example/commands that you used etc)
> >>> to hit the condition ? More clarity on this would help me to add the
> >>> additional logic to decline offload in this case.
> >> I've just manually set the properties of the inner according to my
> >> setup's outers. Regarding suggestion where to check, please see patch 5/5.
> > Outer headers can't be the same as inner, unless it is some kind of
> > misconfiguration. This misconfiguration could occur in full-offload
> > and non-offload modes too, not just in partial offload mode, right ?
> > It's like having duplicate addresses (address conflicts) in the
> > network.  Code to handle this condition might look more confusing ?
> > There are facilities in the networking infrastructure to detect/report
> > such conditions and I feel flow offload layer is not the right place
> > to handle such issues. What do you think ?
>
> I think outer/inner might be the same. There is no restriction to do it
> and it is not a wrong configuration. Those are separated networks. There
> is currently no restriction in OVS to support it (SW, partial mark/rss,
> full offloads). This patchset should not introduce such limitation, and
> should handle it.
I'm sorry for not being able to respond sooner. I tested this
configuration with a Broadcom-NIC. There is no issue and the reason
being the same as mentioned in the above comments. That is, even with
the same inner/outer header (packet already encapsulated in SW), it
doesn't match the offloaded rule and the packet gets transmitted
properly (no double-encap).
Thanks,
-Harsha


>
> >
> > Thanks,
> > -Harsha
> >>> Thanks,
> >>> -Harsha
> >>>>> +         */
> >>>>> +        if (!dp_flow || !dp_flow->partial_actions_offloaded) {
> >>>>> +            if (push_tnl_action(pmd, a, packets_)) {
> >>>>> +                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
> >>>>> +            }
> >>>>> +        } else {
> >>>>> +            COVERAGE_ADD(datapath_skip_tunnel_push, packet_count);
> >>>>>             }
> >>>>>             return;
> >>>>>
> >>>>> @@ -7509,9 +7534,10 @@ static void
> >>>>>     dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> >>>>>                               struct dp_packet_batch *packets,
> >>>>>                               bool should_steal, const struct flow *flow,
> >>>>> -                          const struct nlattr *actions, size_t actions_len)
> >>>>> +                          const struct nlattr *actions, size_t actions_len,
> >>>>> +                          const struct dp_netdev_flow *dp_flow)
> >>>>>     {
> >>>>> -    struct dp_netdev_execute_aux aux = { pmd, flow };
> >>>>> +    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
> >>>>>
> >>>>>         odp_execute_actions(&aux, packets, should_steal, actions,
> >>>>>                             actions_len, dp_execute_cb);


More information about the dev mailing list