[ovs-dev] [PATCH 4/7] netdev-dpdk: implement flow put with rte flow
Darrell Ball
dball at vmware.com
Tue Aug 29 08:11:23 UTC 2017
On 8/22/17, 11:24 PM, "Yuanhan Liu" <yliu at fridaylinux.org> wrote:
From: Finn Christensen <fc at napatech.com>
The basic yet the major part of this patch is to translate the "match"
to rte flow patterns. And then, we create a rte flow with a MARK action.
Afterwards, all pkts matches the flow will have the mark id in the mbuf.
For any unsupported flows, such as MPLS, -1 is returned, meaning the
flow offload is failed and then skipped.
Signed-off-by: Finn Christensen <fc at napatech.com>
Signed-off-by: Yuanhan Liu <yliu at fridaylinux.org>
---
lib/netdev-dpdk.c | 385 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 384 insertions(+), 1 deletion(-)
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 15fef1e..8089da8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -58,6 +58,7 @@
#include "smap.h"
#include "sset.h"
#include "unaligned.h"
+#include "uuid.h"
#include "timeval.h"
#include "unixctl.h"
@@ -3398,6 +3399,388 @@ get_rte_flow_by_ufid(const ovs_u128 *ufid)
}
+#define MAX_RTE_FLOW_ITEMS 100
+#define MAX_RTE_FLOW_ACTIONS 100
I guess these are temporary
Do we need to do a rte query during initialization ?
+
+static int
+netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
+ const struct match *match,
+ struct nlattr *nl_actions OVS_UNUSED,
+ size_t actions_len,
+ const ovs_u128 *ufid,
+ struct offload_info *info)
+{
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ const struct rte_flow_attr flow_attr = {
+ .group = 0,
+ .priority = 0,
+ .ingress = 1,
+ .egress = 0
+ };
+ struct flow_patterns {
+ struct rte_flow_item items[MAX_RTE_FLOW_ITEMS];
+ uint32_t cnt;
+ } patterns;
+ struct flow_actions {
+ struct rte_flow_action actions[MAX_RTE_FLOW_ACTIONS];
+ uint32_t cnt;
+ } actions;
+ struct rte_flow *flow;
+ struct rte_flow_error error;
+
+ uint8_t *ipv4_next_proto_mask = NULL;
+
+#define ADD_FLOW_PATTERN(type_, spec_, mask_) do { \
+ patterns.items[patterns.cnt].type = type_; \
+ patterns.items[patterns.cnt].spec = spec_; \
+ patterns.items[patterns.cnt].mask = mask_; \
+ patterns.items[patterns.cnt].last = NULL; \
+ if (patterns.cnt < MAX_RTE_FLOW_ITEMS) { \
+ patterns.cnt++; \
+ } else { \
+ VLOG_ERR("too many rte flow patterns (> %d)", MAX_RTE_FLOW_ITEMS); \
+ goto err; \
+ } \
+} while (0)
static (inline) function maybe ?
+
+#define ADD_FLOW_ACTION(type_, conf_) do { \
+ actions.actions[actions.cnt].type = type_; \
+ actions.actions[actions.cnt].conf = conf_; \
+ actions.cnt++; \
+ ovs_assert(actions.cnt < MAX_RTE_FLOW_ACTIONS); \
+} while (0)
static (inline) function maybe ?
+
+ patterns.cnt = 0;
+ actions.cnt = 0;
+
+ /* Eth */
+ struct rte_flow_item_eth eth_spec;
+ struct rte_flow_item_eth eth_mask;
+ memset(ð_mask, 0, sizeof(eth_mask));
+ if (match->wc.masks.dl_src.be16[0] ||
+ match->wc.masks.dl_src.be16[1] ||
+ match->wc.masks.dl_src.be16[2] ||
+ match->wc.masks.dl_dst.be16[0] ||
+ match->wc.masks.dl_dst.be16[1] ||
+ match->wc.masks.dl_dst.be16[2]) {
+ rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof(eth_spec.dst));
+ rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof(eth_spec.src));
+ eth_spec.type = match->flow.dl_type;
+
+ rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, sizeof(eth_mask.dst));
+ rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, sizeof(eth_mask.src));
+ eth_mask.type = match->wc.masks.dl_type;
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_ETH, ð_spec, ð_mask);
+ } else {
+ /*
+ * If user specifies a flow (like UDP flow) without L2 patterns,
+ * OVS will at least set the dl_type. Normally, it's enough to
+ * create an eth pattern just with it. Unluckily, some Intel's
+ * NIC (such as XL710) doesn't support that. Below is a workaround,
+ * which simply matches any L2 pkts.
+ */
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_ETH, ð_spec, ð_mask);
+ }
+
+ /* VLAN */
+ struct rte_flow_item_vlan vlan_spec;
+ struct rte_flow_item_vlan vlan_mask;
+ memset(&vlan_mask, 0, sizeof(vlan_mask));
+ if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
+ vlan_spec.tci = match->flow.vlans[0].tci;
+ vlan_mask.tci = match->wc.masks.vlans[0].tci;
+
+ /* match any protocols */
+ vlan_mask.tpid = 0;
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_VLAN, &vlan_spec, &vlan_mask);
+ }
+
+ /* IP v4 */
+ uint8_t proto = 0;
+ struct rte_flow_item_ipv4 ipv4_spec;
+ struct rte_flow_item_ipv4 ipv4_mask;
+ memset(&ipv4_mask, 0, sizeof(ipv4_mask));
+ if ((match->flow.dl_type == ntohs(ETH_TYPE_IP)) &&
+ (match->wc.masks.nw_src || match->wc.masks.nw_dst ||
+ match->wc.masks.nw_tos || match->wc.masks.nw_ttl ||
+ match->wc.masks.nw_proto)) {
+ ipv4_spec.hdr.type_of_service = match->flow.nw_tos;
+ ipv4_spec.hdr.time_to_live = match->flow.nw_tos;
+ ipv4_spec.hdr.next_proto_id = match->flow.nw_proto;
+ ipv4_spec.hdr.src_addr = match->flow.nw_src;
+ ipv4_spec.hdr.dst_addr = match->flow.nw_dst;
+
+ ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos;
+ ipv4_mask.hdr.time_to_live = match->wc.masks.nw_tos;
+ ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto;
+ ipv4_mask.hdr.src_addr = match->wc.masks.nw_src;
+ ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst;
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_IPV4, &ipv4_spec, &ipv4_mask);
+
+ /* Save proto for L4 protocol setup */
+ proto = ipv4_spec.hdr.next_proto_id & ipv4_mask.hdr.next_proto_id;
+
+ /* Remember proto mask address for later modification */
+ ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id;
+ }
+
+ if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP &&
+ proto != IPPROTO_SCTP && proto != IPPROTO_TCP &&
+ (match->wc.masks.tp_src ||
+ match->wc.masks.tp_dst ||
+ match->wc.masks.tcp_flags)) {
+ VLOG_INFO("L4 Protocol (%u) not supported", proto);
+ goto err;
+ }
+
+ struct rte_flow_item_udp udp_spec;
+ struct rte_flow_item_udp udp_mask;
+ memset(&udp_mask, 0, sizeof(udp_mask));
+ if ((proto == IPPROTO_UDP) &&
+ (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
+ udp_spec.hdr.src_port = match->flow.tp_src;
+ udp_spec.hdr.dst_port = match->flow.tp_dst;
+
+ udp_mask.hdr.src_port = match->wc.masks.tp_src;
+ udp_mask.hdr.dst_port = match->wc.masks.tp_dst;
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_UDP, &udp_spec, &udp_mask);
+
+ /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match */
+ if (ipv4_next_proto_mask) {
+ *ipv4_next_proto_mask = 0;
+ }
+ }
+
+ struct rte_flow_item_sctp sctp_spec;
+ struct rte_flow_item_sctp sctp_mask;
+ memset(&sctp_mask, 0, sizeof(sctp_mask));
+ if ((proto == IPPROTO_SCTP) &&
+ (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
+ sctp_spec.hdr.src_port = match->flow.tp_src;
+ sctp_spec.hdr.dst_port = match->flow.tp_dst;
+
+ sctp_mask.hdr.src_port = match->wc.masks.tp_src;
+ sctp_mask.hdr.dst_port = match->wc.masks.tp_dst;
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_SCTP, &sctp_spec, &sctp_mask);
+
+ /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match */
+ if (ipv4_next_proto_mask) {
+ *ipv4_next_proto_mask = 0;
+ }
+ }
+
+ struct rte_flow_item_icmp icmp_spec;
+ struct rte_flow_item_icmp icmp_mask;
+ memset(&icmp_mask, 0, sizeof(icmp_mask));
+ if ((proto == IPPROTO_ICMP) &&
+ (match->wc.masks.tp_src || match->wc.masks.tp_dst)) {
+ icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src);
+ icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst);
+
+ icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match->wc.masks.tp_src);
+ icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match->wc.masks.tp_dst);
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_ICMP, &icmp_spec, &icmp_mask);
+
+ /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match */
+ if (ipv4_next_proto_mask) {
+ *ipv4_next_proto_mask = 0;
+ }
+ }
+
+ struct rte_flow_item_tcp tcp_spec;
+ struct rte_flow_item_tcp tcp_mask;
+ memset(&tcp_mask, 0, sizeof(tcp_mask));
+ if ((proto == IPPROTO_TCP) &&
+ (match->wc.masks.tp_src ||
+ match->wc.masks.tp_dst ||
+ match->wc.masks.tcp_flags)) {
+ tcp_spec.hdr.src_port = match->flow.tp_src;
+ tcp_spec.hdr.dst_port = match->flow.tp_dst;
+ tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8;
+ tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff;
+
+ tcp_mask.hdr.src_port = match->wc.masks.tp_src;
+ tcp_mask.hdr.dst_port = match->wc.masks.tp_dst;
+ tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8;
+ tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
+
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_TCP, &tcp_spec, &tcp_mask);
+
+ /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */
+ if (ipv4_next_proto_mask) {
+ *ipv4_next_proto_mask = 0;
+ }
+ }
+ ADD_FLOW_PATTERN(RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
+
+ struct rte_flow_action_mark mark;
+ if (actions_len) {
+ mark.id = info->flow_mark;
+ ADD_FLOW_ACTION(RTE_FLOW_ACTION_TYPE_MARK, &mark);
+ } else {
+ ADD_FLOW_ACTION(RTE_FLOW_ACTION_TYPE_DROP, NULL);
+ VLOG_INFO("no action given; drop pkts in hardware\n");
+ }
+ ADD_FLOW_ACTION(RTE_FLOW_ACTION_TYPE_END, NULL);
+
+ flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
+ actions.actions, &error);
+ if (!flow) {
+ VLOG_ERR("rte flow creat error: %u : message : %s\n",
+ error.type, error.message);
+ goto err;
+ }
+ add_ufid_dpdk_flow_mapping(ufid, flow);
+ VLOG_INFO("installed flow %p by ufid "UUID_FMT"\n",
+ flow, UUID_ARGS((struct uuid *)ufid));
+
+ return 0;
+
+err:
+ return -1;
+}
+
+/*
+ * Validate for later rte flow offload creation. If any unsupported
+ * flow are specified, return -1.
+ */
+static int
+netdev_dpdk_validate_flow(const struct match *match)
+{
+ struct match match_zero_wc;
+
+ /* Create a wc-zeroed version of flow */
+ match_init(&match_zero_wc, &match->flow, &match->wc);
+
+#define CHECK_NONZERO(addr, size) do { \
+ uint8_t *padr = (uint8_t *)(addr); \
+ int i; \
+ for (i = 0; i < (size); i++) { \
+ if (padr[i] != 0) { \
+ goto err; \
+ } \
+ } \
+} while (0)
+
+#define CHECK_NONZEROSIMPLE(var) do { \
CHECK_NONZERO ?
+ if ((var) != 0) { \
+ goto err; \
+ } \
+} while (0)
+
+ CHECK_NONZERO(&match_zero_wc.flow.tunnel, sizeof(match_zero_wc.flow.tunnel));
+ CHECK_NONZEROSIMPLE(match->wc.masks.metadata);
+ CHECK_NONZEROSIMPLE(match->wc.masks.skb_priority);
+ CHECK_NONZEROSIMPLE(match->wc.masks.pkt_mark);
+ CHECK_NONZEROSIMPLE(match->wc.masks.dp_hash);
+
+ /* recirc id must be zero */
+ CHECK_NONZEROSIMPLE(match_zero_wc.flow.recirc_id);
+
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_state);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_zone);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_mark);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_label.u64.hi);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_label.u64.lo);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_nw_proto);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_tp_src);
+ CHECK_NONZEROSIMPLE(match->wc.masks.ct_tp_dst);
+ CHECK_NONZEROSIMPLE(match->wc.masks.conj_id);
+ CHECK_NONZEROSIMPLE(match->wc.masks.actset_output);
+
+ /* unsupported L2 */
+ CHECK_NONZERO(&match->wc.masks.mpls_lse,
+ sizeof(match_zero_wc.flow.mpls_lse) / sizeof(ovs_be32));
+
+ /* unsupported L3 */
+ CHECK_NONZERO(&match->wc.masks.ipv6_src, sizeof(struct in6_addr));
+ CHECK_NONZERO(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr));
+ CHECK_NONZEROSIMPLE(match->wc.masks.ipv6_label);
+ CHECK_NONZERO(&match->wc.masks.nd_target, sizeof(struct in6_addr));
+ CHECK_NONZERO(&match->wc.masks.arp_sha, sizeof(struct eth_addr));
+ CHECK_NONZERO(&match->wc.masks.arp_tha, sizeof(struct eth_addr));
CHECK_NONZERO_BYTES ?
+
+ /* If fragmented, then don't HW accelerate - for now */
+ CHECK_NONZEROSIMPLE(match_zero_wc.flow.nw_frag);
+
+ /* unsupported L4 */
+ CHECK_NONZEROSIMPLE(match->wc.masks.igmp_group_ip4);
+
+ return 0;
+
+err:
+ VLOG_INFO("Cannot HW accelerate this flow");
+ return -1;
+}
+
+static int
+netdev_dpdk_destroy_rte_flow(struct netdev_dpdk *dev,
+ const ovs_u128 *ufid,
+ struct rte_flow *rte_flow)
+{
+ struct rte_flow_error error;
+ int ret;
+
+ ret = rte_flow_destroy(dev->port_id, rte_flow, &error);
+ if (ret == 0) {
+ del_ufid_dpdk_flow_mapping(ufid);
+ VLOG_INFO("removed rte flow %p associated with ufid " UUID_FMT "\n",
+ rte_flow, UUID_ARGS((struct uuid *)ufid));
+ } else {
+ VLOG_ERR("rte flow destroy error: %u : message : %s\n",
+ error.type, error.message);
+ }
+
+ return ret;
+}
+
+static int
+netdev_dpdk_flow_put(struct netdev *netdev, struct match *match,
+ struct nlattr *actions, size_t actions_len,
+ const ovs_u128 *ufid, struct offload_info *info,
+ struct dpif_flow_stats *stats OVS_UNUSED)
+{
+ struct rte_flow *rte_flow;
+ int ret;
+
+ /*
+ * If an old rte_flow exists, it means it's a flow modification.
+ * Here destroy the old rte flow first before adding a new one.
+ */
+ rte_flow = get_rte_flow_by_ufid(ufid);
+ if (rte_flow) {
+ ret = netdev_dpdk_destroy_rte_flow(netdev_dpdk_cast(netdev),
+ ufid, rte_flow);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = netdev_dpdk_validate_flow(match);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return netdev_dpdk_add_rte_flow_offload(netdev, match, actions,
+ actions_len, ufid, info);
+}
+
Would you mind adding some comments for below NULL elements ?
+#define DPDK_FLOW_OFFLOAD_API \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ netdev_dpdk_flow_put, \
+ NULL, \
+ NULL, \
+ NULL
+
+
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, \
SET_CONFIG, SET_TX_MULTIQ, SEND, \
GET_CARRIER, GET_STATS, \
@@ -3470,7 +3853,7 @@ get_rte_flow_by_ufid(const ovs_u128 *ufid)
RXQ_RECV, \
NULL, /* rx_wait */ \
NULL, /* rxq_drain */ \
- NO_OFFLOAD_API \
+ DPDK_FLOW_OFFLOAD_API \
}
static const struct netdev_class dpdk_class =
--
2.7.4
More information about the dev
mailing list