[ovs-dev] [RFC 2/2] netdev-dpdk: Enable TSO when using multi-seg mbufs
Tiago Lam
tiago.lam at intel.com
Wed Aug 8 15:39:28 UTC 2018
TCP Segmentation Offload (TSO) is a feature which enables the TCP/IP
network stack to delegate segmentation of a TCP segment to the hardware
NIC, thus saving compute resources. This may improve performance
significantly for TCP workload in virtualized environments.
While a previous commit already added the necesary logic to netdev-dpdk
to deal with packets marked for TSO, this set of changes enables TSO by
default when using multi-segment mbufs.
Thus, to enable TSO on the physical DPDK interfaces, only the following
command needs to be issued before starting OvS:
ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true
Co-authored-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Signed-off-by: Tiago Lam <tiago.lam at intel.com>
---
Documentation/topics/dpdk/phy.rst | 64 +++++++++++++++++++++++++++++++++++++++
lib/netdev-dpdk.c | 52 ++++++++++++++++++++++++++-----
2 files changed, 108 insertions(+), 8 deletions(-)
diff --git a/Documentation/topics/dpdk/phy.rst b/Documentation/topics/dpdk/phy.rst
index 1470623..980a629 100644
--- a/Documentation/topics/dpdk/phy.rst
+++ b/Documentation/topics/dpdk/phy.rst
@@ -248,3 +248,67 @@ Command to set interrupt mode for a specific interface::
Command to set polling mode for a specific interface::
$ ovs-vsctl set interface <iface_name> options:dpdk-lsc-interrupt=false
+
+TCP Segmentation Offload (TSO)
+------------------------------
+
+Overview
+~~~~~~~~
+
+TCP Segmentation Offload (TSO) enables a network stack to delegate
+segmentation of an oversized TCP segment to the underlying physical NIC.
+Offload of frame segmentation achieves computational savings in the core,
+freeing up CPU cycles for more useful work.
+
+DPDK v16.07 added support for `TSO` in the vHost user backend; as such, a
+guest's virtual network interfaces may avail of `TSO`. In such a setup, the
+aforementioned computational savings are made in the core acting as the VM's
+virtual CPU, typically resulting in improved TCP throughput.
+
+To enable TSO in a guest, the underlying NIC must first support `TSO` -
+consult your controller's datasheet for compatibility. Secondly, the NIC
+must have an associated DPDK Poll Mode Driver (PMD) which supports `TSO`.
+
+Enabling TSO
+~~~~~~~~~~~~
+
+TSO may be enabled in one of two ways, as follows:
+
+ 1. QEMU Command Line Parameter:
+
+ ```
+ sudo $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 \
+ ...
+ -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1,\
+ mrg_rxbuf=on,csum=on,gso=on,guest_csum=on,guest_tso4=on,\
+ guest_tso6=on,guest_ecn=on \
+ ...
+ ```
+
+ 2. ethtool
+
+`TSO` is enabled in OvS by the DPDK vHost User backend; when a new guest
+connection is established, `TSO` is advertised to the guest as an available
+feature. Assuming that the guest's OS also supports `TSO`, ethtool can be used
+to enable same:
+
+ ```
+ ethtool -K eth0 sg on # scatter-gather is a prerequisite for TSO
+ ethtool -K eth0 tso on
+ ethtool -k eth0 # verify that TSO is reported as 'on'
+ ```
+
+ <b>Note:</b> In both methods, `mergeable buffers` are required:
+ ```
+ sudo $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 \
+ ...
+ mrg_rxbuf=on,\
+ ...
+ ```
+
+Limitations
+~~~~~~~~~~~
+
+The current OvS `TSO` implementation supports flat and VLAN networks only
+(i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP,
+etc.]).
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5da5996..20d4fd5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -379,6 +379,7 @@ struct ingress_policer {
enum dpdk_hw_ol_features {
NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
+ NETDEV_TX_TSO_OFFLOAD = 1 << 2,
};
/*
@@ -1003,6 +1004,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
struct rte_eth_dev_info info;
uint16_t conf_mtu;
+ rte_eth_dev_info_get(dev->port_id, &info);
+
/* As of DPDK 17.11.1 a few PMDs require to explicitly enable
* scatter to support jumbo RX. Checking the offload capabilities
* is not an option as PMDs are not required yet to report
@@ -1010,7 +1013,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
* (testing or code review). Listing all such PMDs feels harder
* than highlighting the one known not to need scatter */
if (dev->mtu > ETHER_MTU) {
- rte_eth_dev_info_get(dev->port_id, &info);
if (strncmp(info.driver_name, "net_nfp", 7)) {
conf.rxmode.enable_scatter = 1;
}
@@ -1018,14 +1020,28 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
/* Multi-segment-mbuf-specific setup. */
if (dpdk_multi_segment_mbufs) {
- /* DPDK PMDs typically attempt to use simple or vectorized
- * transmit functions, neither of which are compatible with
- * multi-segment mbufs. Ensure that these are disabled when
- * multi-segment mbufs are enabled.
- */
- rte_eth_dev_info_get(dev->port_id, &info);
+ if (info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS) {
+ /* DPDK PMDs typically attempt to use simple or vectorized
+ * transmit functions, neither of which are compatible with
+ * multi-segment mbufs. Ensure that these are disabled when
+ * multi-segment mbufs are enabled.
+ */
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
+ }
+
+ if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO;
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
txconf = info.default_txconf;
- txconf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
+ txconf.txq_flags = ETH_TXQ_FLAGS_IGNORE;
+ txconf.offloads = conf.txmode.offloads;
+ } else if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+ dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+ VLOG_WARN("Failed to set Tx TSO offload in %s. Requires option "
+ "`dpdk-multi-seg-mbufs` to be enabled.", dev->up.name);
}
conf.intr_conf.lsc = dev->lsc_interrupt_mode;
@@ -1135,6 +1151,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM |
DEV_RX_OFFLOAD_IPV4_CKSUM;
+ uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO |
+ DEV_TX_OFFLOAD_TCP_CKSUM |
+ DEV_TX_OFFLOAD_IPV4_CKSUM;
rte_eth_dev_info_get(dev->port_id, &info);
@@ -1154,6 +1173,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
dev->hw_ol_features |= NETDEV_RX_CHECKSUM_OFFLOAD;
}
+ if (dpdk_multi_segment_mbufs) {
+ if (info.tx_offload_capa & tx_tso_offload_capa) {
+ dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+ VLOG_WARN("Tx TSO offload is not supported on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+ }
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+ }
+
n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
@@ -1673,6 +1704,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
} else {
smap_add(args, "rx_csum_offload", "false");
}
+ if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+ smap_add(args, "tx_tso_offload", "true");
+ } else {
+ smap_add(args, "tx_tso_offload", "false");
+ }
smap_add(args, "lsc_interrupt_mode",
dev->lsc_interrupt_mode ? "true" : "false");
}
--
2.7.4
More information about the dev
mailing list