[ovs-dev] [RFC v2 2/4] netdev-dpdk: Apply ingress_sched config to dpdk phy ports

Billy O'Mahony billy.o.mahony at intel.com
Tue Jul 11 16:58:31 UTC 2017


Ingress scheduling configuration is given effect by way of Flow Director
filters. A small subset of the possible ingress scheduling possible is
implemented in this patch.

Signed-off-by: Billy O'Mahony <billy.o.mahony at intel.com>
---
 include/openvswitch/ofp-parse.h |   3 ++
 lib/dpif-netdev.c               |   1 +
 lib/netdev-dpdk.c               | 117 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-parse.h
index fc5784e..08d6086 100644
--- a/include/openvswitch/ofp-parse.h
+++ b/include/openvswitch/ofp-parse.h
@@ -37,6 +37,9 @@ struct ofputil_table_mod;
 struct ofputil_bundle_msg;
 struct ofputil_tlv_table_mod;
 struct simap;
+struct tun_table;
+struct flow_wildcards;
+struct ofputil_port_map;
 enum ofputil_protocol;
 
 char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2f224db..66712c7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -44,6 +44,7 @@
 #include "dp-packet.h"
 #include "dpif.h"
 #include "dpif-provider.h"
+#include "netdev-provider.h"
 #include "dummy.h"
 #include "fat-rwlock.h"
 #include "flow.h"
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d14c381..93556e7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -33,6 +33,8 @@
 #include <rte_meter.h>
 #include <rte_virtio_net.h>
 
+#include <openvswitch/ofp-parse.h>
+#include <openvswitch/ofp-util.h>
 #include "dirs.h"
 #include "dp-packet.h"
 #include "dpdk.h"
@@ -168,6 +170,10 @@ static const struct rte_eth_conf port_conf = {
     .txmode = {
         .mq_mode = ETH_MQ_TX_NONE,
     },
+    .fdir_conf = {
+        .mode = RTE_FDIR_MODE_PERFECT,
+    },
+
 };
 
 enum { DPDK_RING_SIZE = 256 };
@@ -652,6 +658,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
     int i;
     struct rte_eth_conf conf = port_conf;
 
+    /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when exactly
+     * two rxqs are defined. Otherwise MQ will not work as expected. */
+    if (dev->ingress_sched_str && n_rxq == 2) {
+        conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+    }
+    else {
+        conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+    }
+
     if (dev->mtu > ETHER_MTU) {
         conf.rxmode.jumbo_frame = 1;
         conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
@@ -752,6 +767,106 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex)
     }
 }
 
+static void
+dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)
+{
+    if (!dev->ingress_sched_str) {
+        return;
+    }
+
+    if (n_rxq != 2) {
+        VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \
+                 "Requires n_rxq==2.", dev->up.name);
+    }
+
+    int priority_q_id = n_rxq-1;
+    char *key, *val, *str, *iter;
+
+    ovs_be32 ip_src, ip_dst;
+    ip_src = ip_dst = 0;
+
+    uint16_t eth_type, port_src, port_dst;
+    eth_type = port_src = port_dst = 0;
+    uint8_t ip_proto = 0;
+
+    char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */
+    /* Parse the configuration into local vars */
+    iter = str = xstrdup(dev->ingress_sched_str);
+    while (ofputil_parse_key_value (&iter, &key, &val)) {
+        if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {
+            mallocd_str = str_to_ip(val, &ip_src);
+        } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) {
+            mallocd_str = str_to_ip(val, &ip_dst);
+        } else if (strcmp(key, "dl_type") == 0 ||
+                   strcmp(key, "eth_type") == 0) {
+            mallocd_str = str_to_u16(val, "eth_type/dl_type", &eth_type);
+        } else if (strcmp(key, "tcp_src") == 0 ||
+                  strcmp(key, "tp_src") == 0 ||
+                  strcmp(key, "udp_src") == 0) {
+            mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);
+        } else if (strcmp(key, "tcp_dst") == 0 ||
+                   strcmp(key, "tp_dst") == 0 ||
+                   strcmp(key, "udp_dst") == 0) {
+            mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);
+        } else if (strcmp(key, "ip") == 0) {
+            eth_type = 0x0800;
+        } else if (strcmp(key, "udp") == 0) {
+            eth_type = 0x0800;
+            ip_proto = 17;
+        } else if (strcmp(key, "tcp") == 0) {
+            eth_type = 0x0800;
+            ip_proto = 6;
+        } else {
+            VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'", \
+                      key);
+        }
+        if (mallocd_str) {
+            VLOG_ERR ("%s", mallocd_str);
+            free(mallocd_str);
+            mallocd_str = NULL;
+        }
+    }
+    free (str);
+
+    /* Set the filters */
+    int diag = 0;
+    if (eth_type && ip_src && ip_dst && port_src && port_dst && ip_proto) {
+        struct rte_eth_fdir_filter entry = { 0 };
+        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+        entry.input.flow.udp4_flow.ip.src_ip = ip_src;
+        entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;
+        entry.input.flow.udp4_flow.src_port = htons(port_src);
+        entry.input.flow.udp4_flow.dst_port = htons(port_dst);
+        entry.action.rx_queue = priority_q_id;
+        entry.action.behavior = RTE_ETH_FDIR_ACCEPT;
+        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;
+        diag = rte_eth_dev_filter_ctrl(dev->port_id,
+                RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);
+    }
+    else if (eth_type && !ip_src && !ip_dst && !port_src
+             && !port_dst && !ip_proto) {
+        struct rte_eth_ethertype_filter entry = {0};
+        memset (&entry, 0, sizeof entry);
+        entry.ether_type = eth_type;
+        entry.flags = 0;
+        entry.queue = priority_q_id;
+        diag = rte_eth_dev_filter_ctrl(dev->port_id,
+                RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);
+    }
+    else {
+        VLOG_ERR("Unsupported ingress scheduling match-field combination.");
+        return;
+    }
+
+    if (diag) {
+        VLOG_ERR("Failed to add ingress scheduling filter.");
+    }
+    else {
+        /* Mark the appropriate q as prioritized */
+        dev->up.priority_rxq = priority_q_id;
+    }
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev)
     OVS_REQUIRES(dev->mutex)
@@ -774,6 +889,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
         return -diag;
     }
 
+    dpdk_apply_ingress_scheduling(dev, n_rxq);
+
     diag = rte_eth_dev_start(dev->port_id);
     if (diag) {
         VLOG_ERR("Interface %s start error: %s", dev->up.name,
-- 
2.7.4



More information about the dev mailing list