[ovs-dev] [PATCH v7 3/6] netdev-dpdk: implement flow offload with rte flow

Wed Mar 21 14:39:15 UTC 2018

>-----Original Message-----
>From: Stokes, Ian <ian.stokes at intel.com>
>Sent: 21. marts 2018 14:38
>To: Finn Christensen <fc at napatech.com>; 'Yuanhan Liu'
><yliu at fridaylinux.org>; dev at openvswitch.org; Shahaf Shuler
><shahafs at mellanox.com>
>Cc: Darrell Ball <dball at vmware.com>; Chandran, Sugesh
><sugesh.chandran at intel.com>; Simon Horman
><simon.horman at netronome.com>
>Subject: RE: [PATCH v7 3/6] netdev-dpdk: implement flow offload with rte
>flow
>
>> -----Original Message-----
>> From: Finn Christensen [mailto:fc at napatech.com]
>> Sent: Friday, March 16, 2018 12:44 PM
>> To: Stokes, Ian <ian.stokes at intel.com>; 'Yuanhan Liu'
>> <yliu at fridaylinux.org>; dev at openvswitch.org; Shahaf Shuler
>> <shahafs at mellanox.com>
>> Cc: Darrell Ball <dball at vmware.com>; Chandran, Sugesh
>> <sugesh.chandran at intel.com>; Simon Horman
><simon.horman at netronome.com>
>> Subject: RE: [PATCH v7 3/6] netdev-dpdk: implement flow offload with
>> rte flow
>>
>> Hi Ian,
>>
>> Thanks for the review.
>> I'll fix the compilation issues and the comments you have below.
>> Please see my answers below.
>>
>> Thanks
>> Finn
>>
>> >-----Original Message-----
>> >From: Stokes, Ian <ian.stokes at intel.com>
>> >Sent: 12. marts 2018 16:00
>> >To: 'Yuanhan Liu' <yliu at fridaylinux.org>; dev at openvswitch.org
>> >Cc: Finn Christensen <fc at napatech.com>; Darrell Ball
>> ><dball at vmware.com>; Chandran, Sugesh <sugesh.chandran at intel.com>;
>> >Simon Horman <simon.horman at netronome.com>
>> >Subject: RE: [PATCH v7 3/6] netdev-dpdk: implement flow offload with
>> >rte flow
>> >
>> >> -----Original Message-----
>> >> From: Yuanhan Liu [mailto:yliu at fridaylinux.org]
>> >> Sent: Monday, January 29, 2018 7:00 AM
>> >> To: dev at openvswitch.org
>> >> Cc: Stokes, Ian <ian.stokes at intel.com>; Finn Christensen
>> >> <fc at napatech.com>; Darrell Ball <dball at vmware.com>; Chandran,
>> >> Sugesh <sugesh.chandran at intel.com>; Simon Horman
>> >> <simon.horman at netronome.com>; Yuanhan Liu <yliu at fridaylinux.org>
>> >> Subject: [PATCH v7 3/6] netdev-dpdk: implement flow offload with
>> >> rte flow
>> >>
>> >> From: Finn Christensen <fc at napatech.com>
>> >>
>> >> The basic yet the major part of this patch is to translate the "match"
>> >> to rte flow patterns. And then, we create a rte flow with MARK +
>> >> RSS actions. Afterwards, all packets match the flow will have the
>> >> mark id in the mbuf.
>> >>
>> >> The reason RSS is needed is, for most NICs, a MARK only action is
>> >> not allowed. It has to be used together with some other actions,
>> >> such as QUEUE, RSS, etc. However, QUEUE action can specify one
>> >> queue only, which may break the rss. Likely, RSS action is
>> >> currently the best we could
>> >now.
>> >> Thus, RSS action is choosen.
>> >>
>> >> For any unsupported flows, such as MPLS, -1 is returned, meaning
>> >> the flow offload is failed and then skipped.
>> >>
>> >> Co-authored-by: Yuanhan Liu <yliu at fridaylinux.org>
>> >> Signed-off-by: Finn Christensen <fc at napatech.com>
>> >> Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
>> >
>> >Hi Finn,
>> >
>> >Thanks for working on this. There are compilation errors introduced
>> >by this commit, details below along with other comments.
>> >
>> >For completeness here is the link to the OVS Travis build with
>> >associated failures
>> >
>> >https://travis-ci.org/istokes/ovs/builds/352283122
>> >
>> >
>> >Thanks
>> >Ian
>> >
>> >> ---
>> >>
>> >> v7: - set the rss_conf for rss action to NULL, to workaround a mlx5
>> change
>> >>       in DPDK v17.11. Note that it will obey the rss settings
>> >> OVS-DPDK
>> has
>> >>       set in the beginning. Thus, nothing should be effected.
>> >>
>> >> ---
>> >>  lib/netdev-dpdk.c | 559
>> >> +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> >>  1 file changed, 558 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>> >> ac2e38e..4bd0503
>> >> 100644
>> >> --- a/lib/netdev-dpdk.c
>> >> +++ b/lib/netdev-dpdk.c
>> >> @@ -38,7 +38,9 @@
>> >>  #include <rte_pci.h>
>> >>  #include <rte_vhost.h>
>> >>  #include <rte_version.h>
>> >> +#include <rte_flow.h>
>> >>
>> >> +#include "cmap.h"
>> >>  #include "dirs.h"
>> >>  #include "dp-packet.h"
>> >>  #include "dpdk.h"
>> >> @@ -60,6 +62,7 @@
>> >>  #include "sset.h"
>> >>  #include "unaligned.h"
>> >>  #include "timeval.h"
>> >> +#include "uuid.h"
>> >>  #include "unixctl.h"
>> >>
>> >>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; @@ -3633,6
>+3636,560
>> >@@
>> >> unlock:
>> >>      return err;
>> >>  }
>> >>
>> >> +/*
>> >> + * A mapping from ufid to dpdk rte_flow.
>> >> + */
>> >> +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
>> >> +
>> >> +struct ufid_to_rte_flow_data {
>> >> +    struct cmap_node node;
>> >> +    ovs_u128 ufid;
>> >> +    struct rte_flow *rte_flow;
>> >> +};
>> >
>> >Might be cleaner to declare above along with the other structs/maps
>> >specific to netdev-dpdk.c towards the beginning of this file.
>>
>> Ok done.
>>
>> >
>> >> +
>> >> +/* Find rte_flow with @ufid */
>> >> +static struct rte_flow *
>> >> +ufid_to_rte_flow_find(const ovs_u128 *ufid) {
>> >> +    size_t hash = hash_bytes(ufid, sizeof(*ufid), 0);
>> >> +    struct ufid_to_rte_flow_data *data;
>> >> +
>> >> +    CMAP_FOR_EACH_WITH_HASH (data, node, hash,
>&ufid_to_rte_flow) {
>> >> +        if (ovs_u128_equals(*ufid, data->ufid)) {
>> >> +            return data->rte_flow;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    return NULL;
>> >> +}
>> >> +
>> >> +static inline void
>> >> +ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct rte_flow
>> >> +*rte_flow) {
>> >> +    size_t hash = hash_bytes(ufid, sizeof(*ufid), 0);
>> >> +    struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data));
>> >> +
>> >> +    /*
>> >> +     * We should not simply overwrite an existing rte flow.
>> >> +     * We should have deleted it first before re-adding it.
>> >> +     * Thus, if following assert triggers, something is wrong:
>> >> +     * the rte_flow is not destroyed.
>> >> +     */
>> >> +    ovs_assert(ufid_to_rte_flow_find(ufid) == NULL);
>> >> +
>> >> +    data->ufid = *ufid;
>> >> +    data->rte_flow = rte_flow;
>> >> +
>> >> +    cmap_insert(&ufid_to_rte_flow,
>> >> +                CONST_CAST(struct cmap_node *, &data->node),
>> >> + hash); }
>> >> +
>> >> +static inline void
>> >> +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) {
>> >> +    size_t hash = hash_bytes(ufid, sizeof(*ufid), 0);
>> >> +    struct ufid_to_rte_flow_data *data;
>> >> +
>> >> +    CMAP_FOR_EACH_WITH_HASH (data, node, hash,
>&ufid_to_rte_flow) {
>> >> +        if (ovs_u128_equals(*ufid, data->ufid)) {
>> >> +            cmap_remove(&ufid_to_rte_flow,
>> >> +                        CONST_CAST(struct cmap_node *,
>> >> + &data->node),
>> >> hash);
>> >> +            free(data);
>> >> +            return;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    VLOG_WARN("ufid "UUID_FMT" is not associated with an rte
>flow\n",
>> >> +              UUID_ARGS((struct uuid *)ufid)); }
>> >> +
>> >> +struct flow_patterns {
>> >> +    struct rte_flow_item *items;
>> >> +    int cnt;
>> >> +    int max;
>> >> +};
>> >> +
>> >> +struct flow_actions {
>> >> +    struct rte_flow_action *actions;
>> >> +    int cnt;
>> >> +    int max;
>> >> +};
>> >> +
>> >> +static void
>> >> +add_flow_pattern(struct flow_patterns *patterns, enum
>> >> +rte_flow_item_type
>> >> type,
>> >> +                 const void *spec, const void *mask) {
>> >> +    int cnt = patterns->cnt;
>> >> +
>> >> +    if (cnt == 0) {
>> >> +        patterns->max = 8;
>> >> +        patterns->items = xcalloc(patterns->max, sizeof(struct
>> >> rte_flow_item));
>> >> +    } else if (cnt == patterns->max) {
>> >> +        patterns->max *= 2;
>> >> +        patterns->items = xrealloc(patterns->items, patterns->max *
>> >> +                                   sizeof(struct rte_flow_item));
>> >> +    }
>> >
>> >Just a general query about max value above, so if cnt == 0 you set
>> >the max to 8. However if cnt is == max you then double the value of max.
>> >
>> >What is the purpose of max and what is it's limit? Maybe it becomes
>> >clearer later in the patch but at the moment it seems 8 is the
>> >default max however it can be higher, I just find the behavior a
>> >little
>> misleading.
>>
>> The rte flow pattern item part is allocated in one allocation and then
>> grows with xrealloc if needed. It initializes with 8 elements and then
>> grows by doubling the number of elements. This is done to avoid a
>> maximum on the number of items and also not allocate for each single
>> item element individually, basically to simplify the code.
>> There is no limit other than when xalloc fails and then ovs_abort is
>> called.
>> If the "max" wording is a bit misleading, we can change it. Maybe
>> "current_max" is better?
>
>Ah ok, that makes a bit more sense. 'current_max' would be fine with a small
>comment explaining that we want to avoid individual allocations.

OK, I'll do that.

>
>In the case where the xrealoc fails I can see why allocating on an individual
>babsis would be poor here.
>
>In the case where no more items can be allocated can we fall back to the
>previous number allocated and flag an error?

It uses xrealloc. And when it fails, it calls out_of_memory and finally abort.
These allocations are really small, and we will probably have a limited 
number of items, even usually below 8. Furthermore, if it fails that small an 
allocation I think it is OK to have xrealloc flag an out_of_memory. It makes 
the code simpler.
If you still think it is worth implementing code for failing a flow offload caused 
by an xrealloc failure, let me know.

>
>Ian
>>
>> >
>> >> +
>> >> +    patterns->items[cnt].type = type;
>> >> +    patterns->items[cnt].spec = spec;
>> >> +    patterns->items[cnt].mask = mask;
>> >> +    patterns->items[cnt].last = NULL;
>> >> +    patterns->cnt++;
>> >> +}
>> >> +
>> >> +static void
>> >> +add_flow_action(struct flow_actions *actions, enum
>> >> +rte_flow_action_type
>> >> type,
>> >> +                const void *conf)
>> >> +{
>> >> +    int cnt = actions->cnt;
>> >> +
>> >> +    if (cnt == 0) {
>> >> +        actions->max = 8;
>> >> +        actions->actions = xcalloc(actions->max,
>> >> +                                   sizeof(struct rte_flow_action));
>> >> +    } else if (cnt == actions->max) {
>> >> +        actions->max *= 2;
>> >> +        actions->actions = xrealloc(actions->actions, actions->max *
>> >> +                                    sizeof(struct rte_flow_action));
>> >> +    }
>> >> +
>> >> +    actions->actions[cnt].type = type;
>> >> +    actions->actions[cnt].conf = conf;
>> >> +    actions->cnt++;
>> >> +}
>> >> +
>> >> +static struct rte_flow_action_rss * add_flow_rss_action(struct
>> >> +flow_actions *actions, struct netdev
>> >> +*netdev) {
>> >> +    int i;
>> >> +    struct rte_flow_action_rss *rss;
>> >> +
>> >> +    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
>> >> +    /*
>> >> +     * Setting it to NULL will let the driver use the default RSS
>> >> +     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
>> >> +     */
>> >> +    rss->rss_conf = NULL;
>> >> +    rss->num = netdev->n_rxq;
>> >> +
>> >> +    for (i = 0; i < rss->num; i++) {
>> >> +        rss->queue[i] = i;
>> >> +    }
>> >> +
>> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
>> >> +
>> >> +    return rss;
>> >> +}
>> >> +
>> >> +static int
>> >> +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
>> >> +                                 const struct match *match,
>> >> +                                 struct nlattr *nl_actions OVS_UNUSED,
>> >> +                                 size_t actions_len OVS_UNUSED,
>> >> +                                 const ovs_u128 *ufid,
>> >> +                                 struct offload_info *info) {
>> >> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> >> +    const struct rte_flow_attr flow_attr = {
>> >> +        .group = 0,
>> >> +        .priority = 0,
>> >> +        .ingress = 1,
>> >> +        .egress = 0
>> >> +    };
>> >> +    struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
>> >> +    struct flow_actions actions = { .actions = NULL, .cnt = 0 };
>> >> +    struct rte_flow *flow;
>> >> +    struct rte_flow_error error;
>> >> +    uint8_t *ipv4_next_proto_mask = NULL;
>> >> +    int ret = 0;
>> >> +
>> >> +    /* Eth */
>> >> +    struct rte_flow_item_eth eth_spec;
>> >> +    struct rte_flow_item_eth eth_mask;
>> >> +    memset(&eth_spec, 0, sizeof(eth_spec));
>> >> +    memset(&eth_mask, 0, sizeof(eth_mask));
>> >> +    if (!eth_addr_is_zero(match->wc.masks.dl_src) ||
>> >
>> >Above line will cause compilation error error: dereferencing pointer
>> >to incomplete type 'const struct match'
>>
>> OK. Modified after rebasing.
>>
>> >
>> >> +        !eth_addr_is_zero(match->wc.masks.dl_dst)) {
>> >> +        rte_memcpy(&eth_spec.dst, &match->flow.dl_dst,
>> >> sizeof(eth_spec.dst));
>> >> +        rte_memcpy(&eth_spec.src, &match->flow.dl_src,
>> >> sizeof(eth_spec.src));
>> >> +        eth_spec.type = match->flow.dl_type;
>> >> +
>> >> +        rte_memcpy(&eth_mask.dst, &match->wc.masks.dl_dst,
>> >> +                   sizeof(eth_mask.dst));
>> >> +        rte_memcpy(&eth_mask.src, &match->wc.masks.dl_src,
>> >> +                   sizeof(eth_mask.src));
>> >> +        eth_mask.type = match->wc.masks.dl_type;
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH,
>> >> +                         &eth_spec, &eth_mask);
>> >> +    } else {
>> >> +        /*
>> >> +         * If user specifies a flow (like UDP flow) without L2
>> patterns,
>> >> +         * OVS will at least set the dl_type. Normally, it's enough to
>> >> +         * create an eth pattern just with it. Unluckily, some Intel's
>> >> +         * NIC (such as XL710) doesn't support that. Below is a
>> >> workaround,
>> >> +         * which simply matches any L2 pkts.
>> >> +         */
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL,
>> >NULL);
>> >> +    }
>> >> +
>> >> +    /* VLAN */
>> >> +    struct rte_flow_item_vlan vlan_spec;
>> >> +    struct rte_flow_item_vlan vlan_mask;
>> >> +    memset(&vlan_spec, 0, sizeof(vlan_spec));
>> >> +    memset(&vlan_mask, 0, sizeof(vlan_mask));
>> >> +    if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
>> >> +        vlan_spec.tci  = match->flow.vlans[0].tci;
>> >> +        vlan_mask.tci  = match->wc.masks.vlans[0].tci;
>> >> +
>> >> +        /* match any protocols */
>> >> +        vlan_mask.tpid = 0;
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>> >> +                         &vlan_spec, &vlan_mask);
>> >> +    }
>> >> +
>> >> +    /* IP v4 */
>> >> +    uint8_t proto = 0;
>> >> +    struct rte_flow_item_ipv4 ipv4_spec;
>> >> +    struct rte_flow_item_ipv4 ipv4_mask;
>> >> +    memset(&ipv4_spec, 0, sizeof(ipv4_spec));
>> >> +    memset(&ipv4_mask, 0, sizeof(ipv4_mask));
>> >> +    if (match->flow.dl_type == ntohs(ETH_TYPE_IP) &&
>> >> +        (match->wc.masks.nw_src || match->wc.masks.nw_dst ||
>> >> +         match->wc.masks.nw_tos || match->wc.masks.nw_ttl ||
>> >> +         match->wc.masks.nw_proto)) {
>> >> +        ipv4_spec.hdr.type_of_service = match->flow.nw_tos;
>> >> +        ipv4_spec.hdr.time_to_live    = match->flow.nw_ttl;
>> >> +        ipv4_spec.hdr.next_proto_id   = match->flow.nw_proto;
>> >> +        ipv4_spec.hdr.src_addr        = match->flow.nw_src;
>> >> +        ipv4_spec.hdr.dst_addr        = match->flow.nw_dst;
>> >> +
>> >> +        ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos;
>> >> +        ipv4_mask.hdr.time_to_live    = match->wc.masks.nw_ttl;
>> >> +        ipv4_mask.hdr.next_proto_id   = match->wc.masks.nw_proto;
>> >> +        ipv4_mask.hdr.src_addr        = match->wc.masks.nw_src;
>> >> +        ipv4_mask.hdr.dst_addr        = match->wc.masks.nw_dst;
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4,
>> >> +                         &ipv4_spec, &ipv4_mask);
>> >> +
>> >> +        /* Save proto for L4 protocol setup */
>> >> +        proto = ipv4_spec.hdr.next_proto_id &
>> >> + ipv4_mask.hdr.next_proto_id;
>> >> +
>> >> +        /* Remember proto mask address for later modification */
>> >> +        ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id;
>> >> +    }
>> >> +
>> >> +    if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP  &&
>> >> +        proto != IPPROTO_SCTP && proto != IPPROTO_TCP  &&
>> >> +        (match->wc.masks.tp_src ||
>> >> +         match->wc.masks.tp_dst ||
>> >> +         match->wc.masks.tcp_flags)) {
>> >> +        VLOG_DBG("L4 Protocol (%u) not supported", proto);
>> >> +        ret = -1;
>> >> +        goto out;
>> >> +    }
>> >> +
>> >> +    if ((match->wc.masks.tp_src && match->wc.masks.tp_src !=
>> >> + 0xffff)
>> ||
>> >> +        (match->wc.masks.tp_dst && match->wc.masks.tp_dst !=
>> >> + 0xffff))
>> {
>> >> +        ret = -1;
>> >> +        goto out;
>> >> +    }
>> >> +
>> >> +    struct rte_flow_item_udp udp_spec;
>> >> +    struct rte_flow_item_udp udp_mask;
>> >> +    memset(&udp_spec, 0, sizeof(udp_spec));
>> >> +    memset(&udp_mask, 0, sizeof(udp_mask));
>> >> +    if (proto == IPPROTO_UDP &&
>> >> +        (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
>> >> +        udp_spec.hdr.src_port = match->flow.tp_src;
>> >> +        udp_spec.hdr.dst_port = match->flow.tp_dst;
>> >> +
>> >> +        udp_mask.hdr.src_port = match->wc.masks.tp_src;
>> >> +        udp_mask.hdr.dst_port = match->wc.masks.tp_dst;
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP,
>> >> +                         &udp_spec, &udp_mask);
>> >> +
>> >> +        /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto
>> >> + match
>> >> */
>> >> +        if (ipv4_next_proto_mask) {
>> >> +            *ipv4_next_proto_mask = 0;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    struct rte_flow_item_sctp sctp_spec;
>> >> +    struct rte_flow_item_sctp sctp_mask;
>> >> +    memset(&sctp_spec, 0, sizeof(sctp_spec));
>> >> +    memset(&sctp_mask, 0, sizeof(sctp_mask));
>> >> +    if (proto == IPPROTO_SCTP &&
>> >> +        (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
>> >> +        sctp_spec.hdr.src_port = match->flow.tp_src;
>> >> +        sctp_spec.hdr.dst_port = match->flow.tp_dst;
>> >> +
>> >> +        sctp_mask.hdr.src_port = match->wc.masks.tp_src;
>> >> +        sctp_mask.hdr.dst_port = match->wc.masks.tp_dst;
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP,
>> >> +                         &sctp_spec, &sctp_mask);
>> >> +
>> >> +        /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for
>> >> + proto match
>> >> */
>> >> +        if (ipv4_next_proto_mask) {
>> >> +            *ipv4_next_proto_mask = 0;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    struct rte_flow_item_icmp icmp_spec;
>> >> +    struct rte_flow_item_icmp icmp_mask;
>> >> +    memset(&icmp_spec, 0, sizeof(icmp_spec));
>> >> +    memset(&icmp_mask, 0, sizeof(icmp_mask));
>> >> +    if (proto == IPPROTO_ICMP &&
>> >> +        (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
>> >> +        icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src);
>> >> +        icmp_spec.hdr.icmp_code =
>> >> + (uint8_t)ntohs(match->flow.tp_dst);
>> >> +
>> >> +        icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match-
>> >wc.masks.tp_src);
>> >> +        icmp_mask.hdr.icmp_code =
>> >> + (uint8_t)ntohs(match->wc.masks.tp_dst);
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP,
>> >> +                         &icmp_spec, &icmp_mask);
>> >> +
>> >> +        /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for
>> >> + proto match
>> >> */
>> >> +        if (ipv4_next_proto_mask) {
>> >> +            *ipv4_next_proto_mask = 0;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    struct rte_flow_item_tcp tcp_spec;
>> >> +    struct rte_flow_item_tcp tcp_mask;
>> >> +    memset(&tcp_spec, 0, sizeof(tcp_spec));
>> >> +    memset(&tcp_mask, 0, sizeof(tcp_mask));
>> >> +    if (proto == IPPROTO_TCP &&
>> >> +        (match->wc.masks.tp_src ||
>> >> +         match->wc.masks.tp_dst ||
>> >> +         match->wc.masks.tcp_flags)) {
>> >> +        tcp_spec.hdr.src_port  = match->flow.tp_src;
>> >> +        tcp_spec.hdr.dst_port  = match->flow.tp_dst;
>> >> +        tcp_spec.hdr.data_off  = ntohs(match->flow.tcp_flags) >> 8;
>> >> +        tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) &
>> >> + 0xff;
>> >> +
>> >> +        tcp_mask.hdr.src_port  = match->wc.masks.tp_src;
>> >> +        tcp_mask.hdr.dst_port  = match->wc.masks.tp_dst;
>> >> +        tcp_mask.hdr.data_off  = ntohs(match->wc.masks.tcp_flags)
>> >> + >>
>> 8;
>> >> +        tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags)
>> >> + & 0xff;
>> >> +
>> >> +        add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP,
>> >> +                         &tcp_spec, &tcp_mask);
>> >> +
>> >> +        /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto
>> >> + match
>> >> */
>> >> +        if (ipv4_next_proto_mask) {
>> >> +            *ipv4_next_proto_mask = 0;
>> >> +        }
>> >> +    }
>> >> +    add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL,
>> >NULL);
>> >> +
>> >> +    struct rte_flow_action_mark mark;
>> >> +    mark.id = info->flow_mark;
>> >> +    add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK,
>&mark);
>> >> +
>> >> +    struct rte_flow_action_rss *rss;
>> >> +    rss = add_flow_rss_action(&actions, netdev);
>> >> +    add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>> >> +
>> >> +    flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
>> >> +                           actions.actions, &error);
>> >> +    free(rss);
>> >> +    if (!flow) {
>> >> +        VLOG_ERR("rte flow creat error: %u : message : %s\n",
>> >> +                 error.type, error.message);
>> >> +        ret = -1;
>> >> +        goto out;
>> >> +    }
>> >> +    ufid_to_rte_flow_associate(ufid, flow);
>> >> +    VLOG_DBG("installed flow %p by ufid "UUID_FMT"\n",
>> >> +             flow, UUID_ARGS((struct uuid *)ufid));
>> >
>> >A general comment, right now for a matching flow this function checks
>> >all supported options. E.g. if the protocol is identified as UDP,
>> >this will still check for TCP etc.
>> >
>> >Could it be refactored similar to how miniflow extract parses info?
>> >i.e. check for most common protocols first, once a protocol is
>> >identified and relevant info extracted then jump to the end of the
>> >function instead of checking against remaining protocols?
>>
>> Yes, that makes sense.
>>
>> >
>> >> +
>> >> +out:
>> >> +    free(patterns.items);
>> >> +    free(actions.actions);
>> >> +    return ret;
>> >> +}
>> >> +
>> >> +static bool
>> >> +is_all_zero(const void *addr, size_t n) {
>> >> +    size_t i = 0;
>> >> +    const uint8_t *p = (uint8_t *)addr;
>> >> +
>> >> +    for (i = 0; i < n; i++) {
>> >> +        if (p[i] != 0) {
>> >> +            return false;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    return true;
>> >> +}
>> >> +
>> >> +/*
>> >> + * Check if any unsupported flow patterns are specified.
>> >> + */
>> >> +static int
>> >> +netdev_dpdk_validate_flow(const struct match *match) {
>> >> +    struct match match_zero_wc;
>> >
>> >Causes compilation error error: storage size of 'match_zero_wc' isn't
>> known.
>> >Also gcc complains with unused variable 'match_zero_wc'.
>> >
>> >> +
>> >> +    /* Create a wc-zeroed version of flow */
>> >> +    match_init(&match_zero_wc, &match->flow, &match->wc);
>> >
>> >Compilation fails with implicit declaration of function 'match_init'.
>>
>> Same as above compilation error.
>> >
>> >> +
>> >> +    if (!is_all_zero(&match_zero_wc.flow.tunnel,
>> >> +                     sizeof(match_zero_wc.flow.tunnel))) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    if (match->wc.masks.metadata ||
>> >> +        match->wc.masks.skb_priority ||
>> >> +        match->wc.masks.pkt_mark ||
>> >> +        match->wc.masks.dp_hash) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    /* recirc id must be zero */
>> >> +    if (match_zero_wc.flow.recirc_id) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    if (match->wc.masks.ct_state ||
>> >> +        match->wc.masks.ct_nw_proto ||
>> >> +        match->wc.masks.ct_zone ||
>> >> +        match->wc.masks.ct_mark ||
>> >> +        match->wc.masks.ct_label.u64.hi ||
>> >> +        match->wc.masks.ct_label.u64.lo) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    if (match->wc.masks.conj_id ||
>> >> +        match->wc.masks.actset_output) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    /* unsupported L2 */
>> >> +    if (!is_all_zero(&match->wc.masks.mpls_lse,
>> >> +                     sizeof(match_zero_wc.flow.mpls_lse))) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    /* unsupported L3 */
>> >> +    if (match->wc.masks.ipv6_label ||
>> >> +        match->wc.masks.ct_nw_src ||
>> >> +        match->wc.masks.ct_nw_dst ||
>> >> +        !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct
>> >> + in6_addr))
>> >> ||
>> >> +        !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct
>> >> + in6_addr))
>> >> ||
>> >> +        !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct
>> >> in6_addr)) ||
>> >> +        !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct
>> >> in6_addr)) ||
>> >> +        !is_all_zero(&match->wc.masks.nd_target, sizeof(struct
>> >> + in6_addr))
>> >> ||
>> >> +        !is_all_zero(&match->wc.masks.nsh, sizeof(struct
>> >> + ovs_key_nsh))
>> ||
>> >> +        !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct
>> >> + eth_addr))
>> >> ||
>> >> +        !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct
>> >> + eth_addr)))
>> >> {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    /* If fragmented, then don't HW accelerate - for now */
>> >> +    if (match_zero_wc.flow.nw_frag) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    /* unsupported L4 */
>> >> +    if (match->wc.masks.igmp_group_ip4 ||
>> >> +        match->wc.masks.ct_tp_src ||
>> >> +        match->wc.masks.ct_tp_dst) {
>> >> +        goto err;
>> >> +    }
>> >> +
>> >> +    return 0;
>> >> +
>> >> +err:
>> >> +    VLOG_ERR("cannot HW accelerate this flow due to unsupported
>> >> protocols");
>> >> +    return -1;
>> >> +}
>> >> +
>> >> +static int
>> >> +netdev_dpdk_destroy_rte_flow(struct netdev_dpdk *dev,
>> >> +                             const ovs_u128 *ufid,
>> >> +                             struct rte_flow *rte_flow) {
>> >> +    struct rte_flow_error error;
>> >> +    int ret;
>> >> +
>> >> +    ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
>> >> +    if (ret == 0) {
>> >> +        ufid_to_rte_flow_disassociate(ufid);
>> >> +        VLOG_DBG("removed rte flow %p associated with ufid "
>> >> + UUID_FMT
>> >> "\n",
>> >> +                 rte_flow, UUID_ARGS((struct uuid *)ufid));
>> >> +    } else {
>> >> +        VLOG_ERR("rte flow destroy error: %u : message : %s\n",
>> >> +                 error.type, error.message);
>> >> +    }
>> >> +
>> >> +    return ret;
>> >> +}
>> >> +
>> >> +static int
>> >> +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match,
>> >> +                     struct nlattr *actions, size_t actions_len,
>> >> +                     const ovs_u128 *ufid, struct offload_info *info,
>> >> +                     struct dpif_flow_stats *stats OVS_UNUSED) {
>> >> +    struct rte_flow *rte_flow;
>> >> +    int ret;
>> >> +
>> >> +    /*
>> >> +     * If an old rte_flow exists, it means it's a flow modification.
>> >> +     * Here destroy the old rte flow first before adding a new one.
>> >> +     */
>> >> +    rte_flow = ufid_to_rte_flow_find(ufid);
>> >> +    if (rte_flow) {
>> >> +        ret = netdev_dpdk_destroy_rte_flow(netdev_dpdk_cast(netdev),
>> >> +                                           ufid, rte_flow);
>> >> +        if (ret < 0) {
>> >> +            return ret;
>> >> +        }
>> >> +    }
>> >> +
>> >> +    ret = netdev_dpdk_validate_flow(match);
>> >> +    if (ret < 0) {
>> >> +        return ret;
>> >> +    }
>> >> +
>> >> +    return netdev_dpdk_add_rte_flow_offload(netdev, match, actions,
>> >> +                                            actions_len, ufid,
>> >> + info); }
>> >> +
>> >> +static int
>> >> +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
>> >> +                     struct dpif_flow_stats *stats OVS_UNUSED) {
>> >> +
>> >> +    struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid);
>> >> +
>> >> +    if (!rte_flow) {
>> >> +        return -1;
>> >> +    }
>> >> +
>> >> +    return netdev_dpdk_destroy_rte_flow(netdev_dpdk_cast(netdev),
>> >> +                                        ufid, rte_flow); }
>> >> +
>> >> +#define DPDK_FLOW_OFFLOAD_API                                 \
>> >> +    NULL,                   /* flow_flush */                  \
>> >> +    NULL,                   /* flow_dump_create */            \
>> >> +    NULL,                   /* flow_dump_destroy */           \
>> >> +    NULL,                   /* flow_dump_next */              \
>> >> +    netdev_dpdk_flow_put,                                     \
>> >> +    NULL,                   /* flow_get */                    \
>> >> +    netdev_dpdk_flow_del,                                     \
>> >> +    NULL                    /* init_flow_api */
>> >> +
>> >> +
>> >>  #define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT,    \
>> >>                            SET_CONFIG, SET_TX_MULTIQ, SEND,    \
>> >>                            GET_CARRIER, GET_STATS,  \ @@ -3707,7
>> >> +4264,7 @@ unlock:
>> >>      RXQ_RECV,                                                 \
>> >>      NULL,                       /* rx_wait */                 \
>> >>      NULL,                       /* rxq_drain */               \
>> >> -    NO_OFFLOAD_API                                            \
>> >> +    DPDK_FLOW_OFFLOAD_API                                     \
>> >>  }
>> >>
>> >>  static const struct netdev_class dpdk_class =
>> >> --
>> >> 2.7.4
>>
>> Disclaimer: This email and any files transmitted with it may contain
>> confidential information intended for the addressee(s) only. The
>> information is not to be surrendered or copied to unauthorized
>> persons. If you have received this communication in error, please
>> notify the sender immediately and delete this e-mail from your system.