[ovs-dev] [PATCH ovs RFC 7/9] dpif-hw-netlink: operate implementation

Paul Blakey paulb at mellanox.com
Tue Sep 27 12:46:02 UTC 2016


added flow offload with tc, supporting
flow get, flow put, and flow del.

Signed-off-by: Paul Blakey <paulb at mellanox.com>
Signed-off-by: Shahar Klein <shahark at mellanox.com>
---
 lib/dpif-hw-netlink.c | 687 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 682 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-hw-netlink.c b/lib/dpif-hw-netlink.c
index 885827a..9473832 100644
--- a/lib/dpif-hw-netlink.c
+++ b/lib/dpif-hw-netlink.c
@@ -47,6 +47,74 @@
 
 VLOG_DEFINE_THIS_MODULE(dpif_hw_netlink);
 
+extern bool SKIP_HW;
+
+static inline void *
+nla_data(const struct nlattr *nla)
+{
+    return (char *) nla + NLA_HDRLEN;
+}
+
+static char *
+attrname(int type)
+{
+    static char unkowntype[64];
+
+    switch (type) {
+    case OVS_KEY_ATTR_PRIORITY:
+        return "OVS_KEY_ATTR_PRIORITY";
+    case OVS_KEY_ATTR_CT_LABELS:
+        return "OVS_KEY_ATTR_CT_LABELS";
+    case OVS_KEY_ATTR_IN_PORT:
+        return "OVS_KEY_ATTR_IN_PORT";
+    case OVS_KEY_ATTR_ETHERNET:
+        return "OVS_KEY_ATTR_ETHERNET";
+    case OVS_KEY_ATTR_VLAN:
+        return "OVS_KEY_ATTR_VLAN";
+    case OVS_KEY_ATTR_ETHERTYPE:
+        return "OVS_KEY_ATTR_ETHERTYPE";
+    case OVS_KEY_ATTR_IPV4:
+        return "OVS_KEY_ATTR_IPV4";
+    case OVS_KEY_ATTR_IPV6:
+        return "OVS_KEY_ATTR_IPV6";
+    case OVS_KEY_ATTR_TCP:
+        return "OVS_KEY_ATTR_TCP";
+    case OVS_KEY_ATTR_UDP:
+        return "OVS_KEY_ATTR_UDP";
+    case OVS_KEY_ATTR_ICMP:
+        return "OVS_KEY_ATTR_ICMP";
+    case OVS_KEY_ATTR_ICMPV6:
+        return "OVS_KEY_ATTR_ICMPV6";
+    case OVS_KEY_ATTR_ARP:
+        return "OVS_KEY_ATTR_ARP";
+    case OVS_KEY_ATTR_ND:
+        return "OVS_KEY_ATTR_ND";
+    case OVS_KEY_ATTR_SKB_MARK:
+        return "OVS_KEY_ATTR_SKB_MARK";
+    case OVS_KEY_ATTR_TUNNEL:
+        return "OVS_KEY_ATTR_TUNNEL";
+    case OVS_KEY_ATTR_SCTP:
+        return "OVS_KEY_ATTR_SCTP";
+    case OVS_KEY_ATTR_TCP_FLAGS:
+        return "OVS_KEY_ATTR_TCP_FLAGS";
+    case OVS_KEY_ATTR_DP_HASH:
+        return "OVS_KEY_ATTR_DP_HASH";
+    case OVS_KEY_ATTR_RECIRC_ID:
+        return "OVS_KEY_ATTR_RECIRC_ID";
+    case OVS_KEY_ATTR_MPLS:
+        return "OVS_KEY_ATTR_MPLS";
+    case OVS_KEY_ATTR_CT_STATE:
+        return "OVS_KEY_ATTR_CT_STATE";
+    case OVS_KEY_ATTR_CT_ZONE:
+        return "OVS_KEY_ATTR_CT_ZONE";
+    case OVS_KEY_ATTR_CT_MARK:
+        return "OVS_KEY_ATTR_CT_MARK";
+    default:
+        sprintf(unkowntype, "unkown_type(%d)\n", type);
+        return unkowntype;
+    }
+}
+
 static char *
 printufid(const ovs_u128 * ovs_ufid)
 {
@@ -426,7 +494,7 @@ dpif_hw_tc_flow_to_dpif_flow(struct dpif_hw_netlink *dpif,
                                    sizeof (*ipv4_mask));
 
         memset(&ipv4_mask->ipv4_proto, 0xFF, sizeof (ipv4_mask->ipv4_proto));
-        ipv4->ipv4_proto = tc_flow->ip_proto;   
+        ipv4->ipv4_proto = tc_flow->ip_proto;
         ipv4_mask->ipv4_frag = UINT8_MAX;
 
         if (tc_flow->ip_type == 4) {
@@ -441,7 +509,7 @@ dpif_hw_tc_flow_to_dpif_flow(struct dpif_hw_netlink *dpif,
         }
 
         if (tc_flow->ip_proto == IPPROTO_ICMP) {
-            /* putting a masked out icmp  */
+            /* putting a masked out icmp */
             struct ovs_key_icmp *icmp =
                 nl_msg_put_unspec_uninit(outflow, OVS_KEY_ATTR_ICMP,
                                          sizeof (*icmp));
@@ -490,7 +558,7 @@ dpif_hw_tc_flow_to_dpif_flow(struct dpif_hw_netlink *dpif,
     size_t actions_offset =
         nl_msg_start_nested(outflow, OVS_FLOW_ATTR_ACTIONS);
     if (tc_flow->ifindex_out) {
-        /* TODO:  make this faster */
+        /* TODO: make this faster */
         int ovsport = get_ovs_port(dpif, tc_flow->ifindex_out);
 
         nl_msg_put_u32(outflow, OVS_ACTION_ATTR_OUTPUT, ovsport);
@@ -843,13 +911,622 @@ dpif_hw_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
                                                              max_flows);
 }
 
+static bool
+odp_mask_attr_is_wildcard(const struct nlattr *ma)
+{
+    return is_all_zeros(nl_attr_get(ma), nl_attr_get_size(ma));
+}
+
+static bool
+odp_mask_is_exact(enum ovs_key_attr attr, const void *mask, size_t size)
+{
+    if (attr == OVS_KEY_ATTR_TCP_FLAGS) {
+        return TCP_FLAGS(*(ovs_be16 *) mask) == TCP_FLAGS(OVS_BE16_MAX);
+    }
+    if (attr == OVS_KEY_ATTR_IPV6) {
+        const struct ovs_key_ipv6 *ipv6_mask = mask;
+
+        return ((ipv6_mask->ipv6_label & htonl(IPV6_LABEL_MASK))
+                == htonl(IPV6_LABEL_MASK))
+            && ipv6_mask->ipv6_proto == UINT8_MAX
+            && ipv6_mask->ipv6_tclass == UINT8_MAX
+            && ipv6_mask->ipv6_hlimit == UINT8_MAX
+            && ipv6_mask->ipv6_frag == UINT8_MAX
+            && ipv6_mask_is_exact((const struct in6_addr *)
+                                  ipv6_mask->ipv6_src)
+            && ipv6_mask_is_exact((const struct in6_addr *)
+                                  ipv6_mask->ipv6_dst);
+    }
+    if (attr == OVS_KEY_ATTR_TUNNEL) {
+        return false;
+    }
+
+    if (attr == OVS_KEY_ATTR_ARP) {
+        /* ARP key has padding, ignore it. */
+        BUILD_ASSERT_DECL(sizeof (struct ovs_key_arp) == 24);
+        BUILD_ASSERT_DECL(offsetof(struct ovs_key_arp, arp_tha) == 10 + 6);
+        size = offsetof(struct ovs_key_arp, arp_tha) + ETH_ADDR_LEN;
+
+        ovs_assert(((uint16_t *) mask)[size / 2] == 0);
+    }
+
+    return is_all_ones(mask, size);
+}
+
+static bool
+odp_mask_attr_is_exact(const struct nlattr *ma)
+{
+    enum ovs_key_attr attr = nl_attr_type(ma);
+    const void *mask;
+    size_t size;
+
+    if (attr == OVS_KEY_ATTR_TUNNEL) {
+        return false;
+    } else {
+        mask = nl_attr_get(ma);
+        size = nl_attr_get_size(ma);
+    }
+
+    return odp_mask_is_exact(attr, mask, size);
+}
+
+static int
+parse_to_tc_flow(struct dpif_hw_netlink *dpif, struct tc_flow *tc_flow,
+                 const struct nlattr *key, int key_len,
+                 const struct nlattr *key_mask, int key_mask_len)
+{
+    size_t left;
+    const struct nlattr *a;
+    const struct nlattr *mask[__OVS_KEY_ATTR_MAX] = { 0 };
+
+    VLOG_DBG("parsing mask:\n");
+    NL_ATTR_FOR_EACH_UNSAFE(a, left, key_mask, key_mask_len) {
+        mask[nl_attr_type(a)] = a;
+    }
+
+    VLOG_DBG("parsing key attributes:\n");
+    NL_ATTR_FOR_EACH_UNSAFE(a, left, key, key_len) {
+        const struct nlattr *ma = mask[nl_attr_type(a)];
+        bool is_wildcard = false;
+        bool is_exact = true;
+
+        if (key_mask && key_mask_len) {
+            is_wildcard = ma ? odp_mask_attr_is_wildcard(ma) : true;
+            is_exact = ma ? odp_mask_attr_is_exact(ma) : false;
+        }
+
+        if (is_exact)
+            VLOG_DBG("mask: %s exact: %p\n", attrname(nl_attr_type(a)), ma);
+        else if (is_wildcard)
+            VLOG_DBG("mask: %s wildcard: %p\n", attrname(nl_attr_type(a)), ma);
+        else
+            VLOG_DBG("mask %s is partial, ma: %p\n", attrname(nl_attr_type(a)),
+                     ma);
+
+        switch (nl_attr_type(a)) {
+        case OVS_KEY_ATTR_UNSPEC:
+        case OVS_KEY_ATTR_ENCAP:
+        case OVS_KEY_ATTR_PRIORITY:
+        case OVS_KEY_ATTR_SKB_MARK:
+        case OVS_KEY_ATTR_CT_STATE:
+        case OVS_KEY_ATTR_CT_ZONE:
+        case OVS_KEY_ATTR_CT_MARK:
+        case OVS_KEY_ATTR_CT_LABELS:
+        case OVS_KEY_ATTR_ND:
+        case OVS_KEY_ATTR_MPLS:
+        case OVS_KEY_ATTR_DP_HASH:
+        case OVS_KEY_ATTR_TUNNEL:
+        case OVS_KEY_ATTR_SCTP:
+        case OVS_KEY_ATTR_VLAN:
+        case OVS_KEY_ATTR_IPV6:
+        case OVS_KEY_ATTR_ICMP:
+        case OVS_KEY_ATTR_ARP:
+        case OVS_KEY_ATTR_ICMPV6:;
+            if (is_wildcard) {
+                VLOG_DBG("unsupported key attribute: %s is wildcard\n",
+                         attrname(nl_attr_type(a)));
+                break;
+            }
+            VLOG_ERR("unsupported key attribute: %s is not wildcard\n",
+                     attrname(nl_attr_type(a)));
+            return 1;
+            break;
+
+        case OVS_KEY_ATTR_TCP_FLAGS:
+        case OVS_KEY_ATTR_RECIRC_ID:
+            /* IGNORE this attributes for now, (might disable some of it in
+             * probe? */
+            VLOG_DBG
+                ("ignoring attribute %s -- fix me, exact: %s, wildcard: %s, partial: %s\n",
+                 attrname(nl_attr_type(a)), is_exact ? "yes" : "no",
+                 is_wildcard ? "yes" : "no", (!is_wildcard
+                                              && !is_exact) ? "yes" : "no");
+            break;
+
+        case OVS_KEY_ATTR_IN_PORT:{
+                if (!is_exact) {
+                    VLOG_ERR("%s isn't exact, can't offload!\n",
+                             attrname(nl_attr_type(a)));
+                    return 1;
+                }
+
+                VLOG_DBG("in_port(%d)\n", nl_attr_get_u32(a));
+                tc_flow->ovs_inport = nl_attr_get_u32(a);
+                tc_flow->indev = port_find(dpif, tc_flow->ovs_inport);
+                tc_flow->ifindex =
+                    tc_flow->indev ? netdev_get_ifindex(tc_flow->indev) : 0;
+                if (!tc_flow->ovs_inport || !tc_flow->ifindex) {
+                    VLOG_ERR
+                        ("RESULT: not found inport: %d or ifindex: %d for ovs in_port: %d\n",
+                         tc_flow->ovs_inport, tc_flow->ifindex,
+                         tc_flow->ovs_inport);
+                    return 1;
+                }
+            }
+            break;
+
+        case OVS_KEY_ATTR_ETHERNET:{
+                const struct ovs_key_ethernet *eth_key = 0;
+                struct ovs_key_ethernet full_mask;
+
+                memset(&full_mask, 0xFF, sizeof (full_mask));
+                if (!SKIP_HW) {
+                    ma = 0;
+                    VLOG_DBG
+                        ("%s %d %s: FORCING FULL MASK ON ETH MAC ADDRESS, because of \"syndrome command failed, status bad parameter(0x3), syndrome 0x3ad328\"",
+                         __FILE__, __LINE__, __func__);
+                }
+                const struct ovs_key_ethernet *eth_key_mask =
+                    ma ? nla_data(ma) : &full_mask;
+                eth_key = nla_data(a);
+
+                const struct eth_addr *src = &eth_key->eth_src;
+                const struct eth_addr *src_mask = &eth_key_mask->eth_src;
+                const struct eth_addr *dst = &eth_key->eth_dst;
+                const struct eth_addr *dst_mask = &eth_key_mask->eth_dst;
+
+                memcpy(&tc_flow->src_mac, src, sizeof (tc_flow->src_mac));
+                memcpy(&tc_flow->src_mac_mask, src_mask,
+                       sizeof (tc_flow->src_mac_mask));
+                memcpy(&tc_flow->dst_mac, dst, sizeof (tc_flow->dst_mac));
+                memcpy(&tc_flow->dst_mac_mask, dst_mask,
+                       sizeof (tc_flow->dst_mac_mask));
+
+                VLOG_DBG("eth(src=" ETH_ADDR_FMT ", src_mask=" ETH_ADDR_FMT
+                         ", dst=" ETH_ADDR_FMT ", dst_mask=" ETH_ADDR_FMT "\n",
+                         ETH_ADDR_ARGS(tc_flow->src_mac),
+                         ETH_ADDR_ARGS(tc_flow->src_mac_mask),
+                         ETH_ADDR_ARGS(tc_flow->dst_mac),
+                         ETH_ADDR_ARGS(tc_flow->dst_mac_mask));
+            }
+            break;
+        case OVS_KEY_ATTR_ETHERTYPE:{
+                if (!is_exact) {
+                    VLOG_ERR("attribute %s isn't exact, can't offload!\n",
+                             attrname(nl_attr_type(a)));
+                    return 1;
+                }
+
+                tc_flow->eth_type = nl_attr_get_be16(a);
+                VLOG_DBG("eth_type(0x%04x)\n", ntohs(tc_flow->eth_type));
+            }
+            break;
+        case OVS_KEY_ATTR_IPV4:{
+                const struct ovs_key_ipv4 *ipv4 = nla_data(a);
+                struct ovs_key_ipv4 full_mask;
+
+                memset(&full_mask, 0xFF, sizeof (full_mask));
+                const struct ovs_key_ipv4 *ipv4_mask =
+                    ma ? nla_data(ma) : &full_mask;
+
+                if (ipv4_mask->ipv4_frag) {
+                    VLOG_WARN
+                        ("*** ignoring exact or partial mask on unsupported ipv4_frag, mask: %x",
+                         ipv4_mask->ipv4_frag);
+                }
+
+                if (ipv4_mask->ipv4_ttl || ipv4_mask->ipv4_tos) {
+                    VLOG_ERR
+                        ("ipv4 mask exact or partial one of unsupported sub attributes (ttl: %x, tos: %x, frag: %x)\n",
+                         ipv4_mask->ipv4_ttl, ipv4_mask->ipv4_tos,
+                         ipv4_mask->ipv4_frag);
+                    return 1;
+                }
+
+                if (ipv4_mask->ipv4_proto != 0
+                    && ipv4_mask->ipv4_proto != 0xFF) {
+                    VLOG_WARN
+                        ("*** ignoring partial mask on ipv4_proto, taking exact ip_proto: %d (%x)\n",
+                         ipv4_mask->ipv4_proto, ipv4->ipv4_proto);
+                }
+
+                /* If not wildcard out, take exact match for ipv4_proto
+                 * (ignoring mask) */
+                if (ipv4_mask->ipv4_proto != 0)
+                    tc_flow->ip_proto = ipv4->ipv4_proto;
+
+                if (ipv4_mask->ipv4_src) {
+                    tc_flow->ipv4.ipv4_src = ipv4->ipv4_src;
+                    tc_flow->ipv4.ipv4_src_mask = ipv4_mask->ipv4_src;
+                }
+                if (ipv4_mask->ipv4_dst) {
+                    tc_flow->ipv4.ipv4_dst = ipv4->ipv4_dst;
+                    tc_flow->ipv4.ipv4_dst_mask = ipv4_mask->ipv4_dst;
+                }
+                tc_flow->ip_type = 4;
+            }
+            break;
+        case OVS_KEY_ATTR_TCP:{
+                struct ovs_key_tcp full_mask;
+
+                memset(&full_mask, 0xFF, sizeof (full_mask));
+                const struct ovs_key_tcp *tcp_mask =
+                    ma ? nla_data(ma) : &full_mask;
+                const struct ovs_key_tcp *tcp = nla_data(a);
+
+                if (tcp_mask->tcp_src) {
+                    tc_flow->src_port = tcp->tcp_src;
+                    tc_flow->src_port_mask = tcp_mask->tcp_src;
+                }
+                if (tcp_mask->tcp_dst) {
+                    tc_flow->dst_port = tcp->tcp_dst;
+                    tc_flow->dst_port_mask = tcp_mask->tcp_dst;
+                }
+
+                VLOG_DBG("tcp(src=%d, msk: 0x%x, dst=%d, msk: 0x%x)\n",
+                         htons(tcp->tcp_src), htons(tcp_mask->tcp_src),
+                         htons(tcp->tcp_dst), htons(tcp_mask->tcp_dst));
+            }
+            break;
+        case OVS_KEY_ATTR_UDP:{
+                struct ovs_key_udp full_mask;
+
+                memset(&full_mask, 0xFF, sizeof (full_mask));
+                const struct ovs_key_udp *udp_mask =
+                    ma ? nla_data(ma) : &full_mask;
+                const struct ovs_key_udp *udp = nla_data(a);
+
+                if (udp_mask->udp_src) {
+                    tc_flow->src_port = udp->udp_src;
+                    tc_flow->src_port_mask = udp_mask->udp_src;
+                }
+                if (udp_mask->udp_dst) {
+                    tc_flow->dst_port = udp->udp_dst;
+                    tc_flow->dst_port_mask = udp_mask->udp_dst;
+                }
+                VLOG_DBG("udp(src=%d/0x%x, dst=%d/0x%x)\n",
+                         htons(udp->udp_src), htons(udp_mask->udp_src),
+                         htons(udp->udp_dst), htons(udp_mask->udp_dst));
+            }
+            break;
+
+        case __OVS_KEY_ATTR_MAX:
+        default:
+            VLOG_ERR("unknown (default/max) key attribute: %s\n",
+                     attrname(nl_attr_type(a)));
+            return 1;
+        }
+    }
+    VLOG_DBG("--- finished parsing attr - can offload!\n");
+    return 0;
+
+}
+
+static enum dpif_hw_offload_policy
+parse_flow_put(struct dpif_hw_netlink *dpif, struct dpif_flow_put *put)
+{
+
+/*
+ * if this is a modify flow cmd and the policy changed: 
+ * 	delete the old one
+ * handle the new/modify flow
+ *
+ *
+*/
+    const struct nlattr *a;
+    size_t left;
+    struct netdev *in = 0;
+    enum dpif_hw_offload_policy policy;
+
+    int probe_feature = ((put->flags & DPIF_FP_PROBE) ? 1 : 0);
+
+    if (probe_feature) {
+        VLOG_DBG("\n.\nPROBE REQUEST!\n.\n");
+        /* see usage at dpif_probe_feature, we might want to intercept and
+         * disable some features */
+        return DPIF_HW_NO_OFFLAOAD;
+    }
+    int cmd =
+        put->flags & DPIF_FP_CREATE ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET;
+    if (!put->ufid) {
+        VLOG_INFO
+            ("%s %d %s missing ufid for flow put, might be from dpctl add-flow.",
+             __FILE__, __LINE__, __func__);
+    }
+
+    policy = HW_offload_test_put(dpif, put);
+    if (put->ufid)
+        put_policy(dpif, put->ufid, policy);
+    int proto = 0;
+    int handle =
+        gethandle(dpif, put->ufid, &in, &proto, "DPIF_OP_FLOW_PUT", 1);
+    if (handle && proto && (policy == DPIF_HW_NO_OFFLAOAD)) {
+        put->flags |= DPIF_FP_CREATE;
+        int ifindex = netdev_get_ifindex(in);
+
+        tc_del_flower(ifindex, handle, proto);
+        delhandle(dpif, put->ufid);
+        return DPIF_HW_NO_OFFLAOAD;
+    }
+
+    if (policy == DPIF_HW_NO_OFFLAOAD)
+        return DPIF_HW_NO_OFFLAOAD;
+
+    if (cmd == OVS_FLOW_CMD_NEW)
+        VLOG_DBG("cmd is OVS_FLOW_CMD_NEW - create\n");
+    else
+        VLOG_DBG("cmd is OVS_FLOW_CMD_SET - modify\n");
+
+    if (put->flags & DPIF_FP_ZERO_STATS && cmd == OVS_FLOW_CMD_SET)
+        VLOG_WARN
+            ("We need to zero the stats of a modified flow, not implemented, ignored\n");
+
+    if (put->stats)
+        VLOG_WARN("FLOW PUT WANTS STATS\n");
+
+    /* if not present, and cmd == OVS_FLOW_CMD_SET, means don't modify ACTIONs 
+     * (which we wrongly parse as a drop rule) see include/odp-netlink.h +:490
+     * to clear actions with OVS_FLOW_CMD_SET, actions will be present but
+     * empty */
+    if (!put->key) {
+        VLOG_ERR("%s %d %s error ,missing key, cmd: %d!", __FILE__, __LINE__,
+                 __func__, cmd);
+        return DPIF_HW_NO_OFFLAOAD;
+    }
+    if (!put->actions) {
+        if (cmd == OVS_FLOW_CMD_NEW) {
+            VLOG_WARN("%s %d %s error missing actions on cmd new!", __FILE__,
+                      __LINE__, __func__);
+        } else {
+            VLOG_WARN
+                ("%s %d %s missing actions on cmd modify, find and modify key only",
+                 __FILE__, __LINE__, __func__);
+        }
+    }
+
+    int outport_count = 0;
+
+    VLOG_DBG("parsing actions\n");
+    NL_ATTR_FOR_EACH_UNSAFE(a, left, put->actions, put->actions_len) {
+        if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT) {
+            VLOG_WARN("output to port: %d\n", nl_attr_get_u32(a));
+            outport_count++;
+        }
+    }
+    if (outport_count == 0)
+        VLOG_DBG("output to port: drop\n");
+
+    struct ds ds;
+
+    ds_init(&ds);
+    ds_clear(&ds);
+    if (put->ufid) {
+        odp_format_ufid(put->ufid, &ds);
+        ds_put_cstr(&ds, ", ");
+    }
+
+    ds_put_cstr(&ds, "verbose: ");
+    odp_flow_format(put->key, put->key_len, put->mask, put->mask_len, 0, &ds,
+                    true);
+    ds_put_cstr(&ds, ", not_verbose: ");
+    odp_flow_format(put->key, put->key_len, put->mask, put->mask_len, 0, &ds,
+                    false);
+
+    /* can also use dpif_flow_stats_format(&f->stats, ds) to print stats */
+
+    ds_put_cstr(&ds, ", actions:");
+    format_odp_actions(&ds, put->actions, put->actions_len);
+    VLOG_DBG("%s\n", ds_cstr(&ds));
+    ds_destroy(&ds);
+
+    /* parse tc_flow */
+    struct tc_flow tc_flow;
+
+    memset(&tc_flow, 0, sizeof (tc_flow));
+    tc_flow.handle = handle;
+    int cant_offload =
+        parse_to_tc_flow(dpif, &tc_flow, put->key, put->key_len, put->mask,
+                         put->mask_len);
+
+    int new = handle ? 0 : 1;
+
+    VLOG_DBG
+        ("cant_offload: %d ifindex: %d, eth_type: %x, ip_proto: %d,  outport_count: %d\n",
+         cant_offload, tc_flow.ifindex, ntohs(tc_flow.eth_type),
+         tc_flow.ip_proto, outport_count);
+    if (!cant_offload && tc_flow.ifindex && tc_flow.eth_type
+        && outport_count <= 1) {
+        VLOG_DBG("RESULT: %p, ***** offloading (HW_ONLY!)\n", dpif);
+        if (cmd != OVS_FLOW_CMD_NEW && !handle) {
+            /* modify and flow is now offloadable, remove from kernel netlink
+             * datapath */
+            int error =
+                dpif_flow_del(dpif->lp_dpif_netlink, put->key, put->key_len,
+                              put->ufid, PMD_ID_NULL, NULL);
+
+            if (!error)
+                VLOG_DBG("modify, deleted old flow and offloading new\n");
+            else
+                VLOG_ERR("modify, error: %d\n", error);
+        }
+
+        int error = 0;
+
+        outport_count = 0;
+        NL_ATTR_FOR_EACH_UNSAFE(a, left, put->actions, put->actions_len) {
+            if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT) {
+                outport_count++;
+
+                tc_flow.ovs_outport = nl_attr_get_u32(a);
+                tc_flow.outdev = port_find(dpif, tc_flow.ovs_outport);
+                tc_flow.ifindex_out =
+                    tc_flow.outdev ? netdev_get_ifindex(tc_flow.outdev) : 0;
+                if (tc_flow.ifindex_out) {
+                    VLOG_DBG
+                        (" **** handle: %d, new? %d, adding %d -> %d (ifindex: %d -> %d)\n",
+                         tc_flow.handle, new, tc_flow.ovs_inport,
+                         tc_flow.ovs_outport, tc_flow.ifindex,
+                         tc_flow.ifindex_out);
+                    int error = tc_replace_flower(&tc_flow);
+
+                    if (!error) {
+                        if (new)
+                            puthandle(dpif, put->ufid, tc_flow.indev,
+                                      tc_flow.ovs_inport, tc_flow.handle,
+                                      tc_flow.eth_type);
+
+                        VLOG_DBG(" **** offloaded! handle: %d (%x)\n",
+                                 tc_flow.handle, tc_flow.handle);
+                    } else
+                        VLOG_ERR
+                            (" **** error! adding fwd rule! tc error: %d\n",
+                             error);
+                } else {
+                    VLOG_ERR
+                        (" **** error! not found output port %d, ifindex: %d\n",
+                         tc_flow.ovs_outport, tc_flow.ifindex_out);
+                    break;
+                }
+            }
+        }
+        if (!outport_count) {
+            VLOG_DBG
+                (" ***** handle: %d, new? %d, adding %d -> DROP (ifindex: %d -> DROP)\n",
+                 tc_flow.handle, new, tc_flow.ovs_inport, tc_flow.ifindex);
+            error = tc_replace_flower(&tc_flow);
+            if (!error) {
+                if (new)
+                    puthandle(dpif, put->ufid, tc_flow.indev,
+                              tc_flow.ovs_inport, tc_flow.handle,
+                              tc_flow.eth_type);
+
+                VLOG_DBG(" **** offloaded! handle: %d (%x)\n", tc_flow.handle,
+                         tc_flow.handle);
+            } else
+                VLOG_ERR(" **** error adding drop rule! tc error: %d\n",
+                         error);
+        }
+
+        if (error)
+            return DPIF_HW_NO_OFFLAOAD;
+        return DPIF_HW_OFFLOAD_ONLY;
+    }
+
+    VLOG_DBG("RESULT: SW\n");
+
+    return DPIF_HW_NO_OFFLAOAD;
+}
+
+static enum dpif_hw_offload_policy
+parse_flow_get(struct dpif_hw_netlink *dpif, struct dpif_flow_get *get)
+{
+    struct netdev *in = 0;
+    int protocol = 0;
+    int handle =
+        gethandle(dpif, get->ufid, &in, &protocol, "DPIF_OP_FLOW_GET", 1);
+
+    if (handle && protocol) {
+        struct tc_flow tc_flow;
+        int ifindex = netdev_get_ifindex(in);
+        int ovs_port = get_ovs_port(dpif, ifindex);
+        int error = ENOENT;
+
+        if (ovs_port != -1)
+            error = tc_get_flower(ifindex, handle, protocol, &tc_flow);
+
+        if (!error) {
+            dpif_hw_tc_flow_to_dpif_flow(dpif, &tc_flow, get->flow, ovs_port,
+                                         get->buffer, in);
+            return DPIF_HW_OFFLOAD_ONLY;
+        }
+    }
+
+    return DPIF_HW_NO_OFFLAOAD;
+}
+
+static enum dpif_hw_offload_policy
+parse_flow_del(struct dpif_hw_netlink *dpif, struct dpif_flow_del *del)
+{
+    struct netdev *in = 0;
+    int protocol = 0;
+    int handle =
+        gethandle(dpif, del->ufid, &in, &protocol, "DPIF_OP_FLOW_DEL", 1);
+
+    /* we delete the handle anyway (even if not deleted from tc) */
+    delhandle(dpif, del->ufid);
+    del_policy(dpif, del->ufid);
+
+    if (handle && protocol) {
+        int ifindex = netdev_get_ifindex(in);
+
+        VLOG_DBG("deleting ufid %s, handle %d, protocol: %d, ifindex: %d\n",
+                 printufid(del->ufid), handle, protocol, ifindex);
+        int error = tc_del_flower(ifindex, handle, protocol);
+
+        if (error)
+            VLOG_ERR("DELETE FAILED: tc error: %d\n", error);
+        else
+            VLOG_DBG("DELETE SUCCESS!\n");
+
+        if (error)
+            return DPIF_HW_NO_OFFLAOAD;
+
+        return DPIF_HW_OFFLOAD_ONLY;
+    }
+
+    VLOG_DBG("del with no handle/ufid/protocol, SW only\n");
+    return DPIF_HW_NO_OFFLAOAD;
+}
+
+static enum dpif_hw_offload_policy
+parse_operate(struct dpif_hw_netlink *dpif, struct dpif_op *op)
+{
+    switch (op->type) {
+    case DPIF_OP_FLOW_PUT:
+        VLOG_DBG("DPIF_OP_FLOW_PUT");
+        return parse_flow_put(dpif, &op->u.flow_put);
+    case DPIF_OP_FLOW_GET:
+        VLOG_DBG("DPIF_OP_FLOW_GET");
+        return parse_flow_get(dpif, &op->u.flow_get);
+    case DPIF_OP_FLOW_DEL:
+        VLOG_DBG("DPIF_OP_FLOW_DEL");
+        return parse_flow_del(dpif, &op->u.flow_del);
+
+    case DPIF_OP_EXECUTE:
+    default:
+        return DPIF_HW_NO_OFFLAOAD;
+    }
+    return DPIF_HW_NO_OFFLAOAD;
+}
+
 static void
 dpif_hw_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
 {
     struct dpif_hw_netlink *dpif = dpif_hw_netlink_cast(dpif_);
 
-    return dpif->lp_dpif_netlink->dpif_class->operate(dpif->lp_dpif_netlink,
-                                                      ops, n_ops);
+    struct dpif_op **new_ops = xmalloc(sizeof (struct dpif_op *) * n_ops);
+    int n_new_ops = 0;
+    int i = 0;
+
+    for (i = 0; i < n_ops; i++) {
+        if (parse_operate(dpif, ops[i]) == DPIF_HW_OFFLOAD_ONLY) {
+            ops[i]->error = 0;
+        } else
+            new_ops[n_new_ops++] = ops[i];
+    }
+    dpif->lp_dpif_netlink->dpif_class->operate(dpif->lp_dpif_netlink, new_ops,
+                                               n_new_ops);
+    free(new_ops);
 }
 
 static int
-- 
1.8.3.1




More information about the dev mailing list