[ovs-dev] [RFC v2 2/4] netdev-dpdk: Apply ingress_sched config to dpdk phy ports
Billy O'Mahony
billy.o.mahony at intel.com
Tue Jul 11 16:58:31 UTC 2017
Ingress scheduling configuration is given effect by way of Flow Director
filters. A small subset of the possible ingress scheduling possible is
implemented in this patch.
Signed-off-by: Billy O'Mahony <billy.o.mahony at intel.com>
---
include/openvswitch/ofp-parse.h | 3 ++
lib/dpif-netdev.c | 1 +
lib/netdev-dpdk.c | 117 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 121 insertions(+)
diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-parse.h
index fc5784e..08d6086 100644
--- a/include/openvswitch/ofp-parse.h
+++ b/include/openvswitch/ofp-parse.h
@@ -37,6 +37,9 @@ struct ofputil_table_mod;
struct ofputil_bundle_msg;
struct ofputil_tlv_table_mod;
struct simap;
+struct tun_table;
+struct flow_wildcards;
+struct ofputil_port_map;
enum ofputil_protocol;
char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2f224db..66712c7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -44,6 +44,7 @@
#include "dp-packet.h"
#include "dpif.h"
#include "dpif-provider.h"
+#include "netdev-provider.h"
#include "dummy.h"
#include "fat-rwlock.h"
#include "flow.h"
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d14c381..93556e7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -33,6 +33,8 @@
#include <rte_meter.h>
#include <rte_virtio_net.h>
+#include <openvswitch/ofp-parse.h>
+#include <openvswitch/ofp-util.h>
#include "dirs.h"
#include "dp-packet.h"
#include "dpdk.h"
@@ -168,6 +170,10 @@ static const struct rte_eth_conf port_conf = {
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
},
+ .fdir_conf = {
+ .mode = RTE_FDIR_MODE_PERFECT,
+ },
+
};
enum { DPDK_RING_SIZE = 256 };
@@ -652,6 +658,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
int i;
struct rte_eth_conf conf = port_conf;
+ /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when exactly
+ * two rxqs are defined. Otherwise MQ will not work as expected. */
+ if (dev->ingress_sched_str && n_rxq == 2) {
+ conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+ }
+ else {
+ conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ }
+
if (dev->mtu > ETHER_MTU) {
conf.rxmode.jumbo_frame = 1;
conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
@@ -752,6 +767,106 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex)
}
}
+static void
+dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)
+{
+ if (!dev->ingress_sched_str) {
+ return;
+ }
+
+ if (n_rxq != 2) {
+ VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \
+ "Requires n_rxq==2.", dev->up.name);
+ }
+
+ int priority_q_id = n_rxq-1;
+ char *key, *val, *str, *iter;
+
+ ovs_be32 ip_src, ip_dst;
+ ip_src = ip_dst = 0;
+
+ uint16_t eth_type, port_src, port_dst;
+ eth_type = port_src = port_dst = 0;
+ uint8_t ip_proto = 0;
+
+ char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */
+ /* Parse the configuration into local vars */
+ iter = str = xstrdup(dev->ingress_sched_str);
+ while (ofputil_parse_key_value (&iter, &key, &val)) {
+ if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {
+ mallocd_str = str_to_ip(val, &ip_src);
+ } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) {
+ mallocd_str = str_to_ip(val, &ip_dst);
+ } else if (strcmp(key, "dl_type") == 0 ||
+ strcmp(key, "eth_type") == 0) {
+ mallocd_str = str_to_u16(val, "eth_type/dl_type", ð_type);
+ } else if (strcmp(key, "tcp_src") == 0 ||
+ strcmp(key, "tp_src") == 0 ||
+ strcmp(key, "udp_src") == 0) {
+ mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);
+ } else if (strcmp(key, "tcp_dst") == 0 ||
+ strcmp(key, "tp_dst") == 0 ||
+ strcmp(key, "udp_dst") == 0) {
+ mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);
+ } else if (strcmp(key, "ip") == 0) {
+ eth_type = 0x0800;
+ } else if (strcmp(key, "udp") == 0) {
+ eth_type = 0x0800;
+ ip_proto = 17;
+ } else if (strcmp(key, "tcp") == 0) {
+ eth_type = 0x0800;
+ ip_proto = 6;
+ } else {
+ VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'", \
+ key);
+ }
+ if (mallocd_str) {
+ VLOG_ERR ("%s", mallocd_str);
+ free(mallocd_str);
+ mallocd_str = NULL;
+ }
+ }
+ free (str);
+
+ /* Set the filters */
+ int diag = 0;
+ if (eth_type && ip_src && ip_dst && port_src && port_dst && ip_proto) {
+ struct rte_eth_fdir_filter entry = { 0 };
+ entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+ entry.input.flow.udp4_flow.ip.src_ip = ip_src;
+ entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;
+ entry.input.flow.udp4_flow.src_port = htons(port_src);
+ entry.input.flow.udp4_flow.dst_port = htons(port_dst);
+ entry.action.rx_queue = priority_q_id;
+ entry.action.behavior = RTE_ETH_FDIR_ACCEPT;
+ entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;
+ diag = rte_eth_dev_filter_ctrl(dev->port_id,
+ RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);
+ }
+ else if (eth_type && !ip_src && !ip_dst && !port_src
+ && !port_dst && !ip_proto) {
+ struct rte_eth_ethertype_filter entry = {0};
+ memset (&entry, 0, sizeof entry);
+ entry.ether_type = eth_type;
+ entry.flags = 0;
+ entry.queue = priority_q_id;
+ diag = rte_eth_dev_filter_ctrl(dev->port_id,
+ RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);
+ }
+ else {
+ VLOG_ERR("Unsupported ingress scheduling match-field combination.");
+ return;
+ }
+
+ if (diag) {
+ VLOG_ERR("Failed to add ingress scheduling filter.");
+ }
+ else {
+ /* Mark the appropriate q as prioritized */
+ dev->up.priority_rxq = priority_q_id;
+ }
+}
+
static int
dpdk_eth_dev_init(struct netdev_dpdk *dev)
OVS_REQUIRES(dev->mutex)
@@ -774,6 +889,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
return -diag;
}
+ dpdk_apply_ingress_scheduling(dev, n_rxq);
+
diag = rte_eth_dev_start(dev->port_id);
if (diag) {
VLOG_ERR("Interface %s start error: %s", dev->up.name,
--
2.7.4
More information about the dev
mailing list