[ovs-dev] [PATCH 2/3] vhost TSO enabled.
Flavio Leitner
fbl at sysclose.org
Mon Sep 23 14:15:09 UTC 2019
Signed-off-by: Flavio Leitner <fbl at sysclose.org>
---
lib/dp-packet.h | 16 ++++++++
lib/netdev-dpdk.c | 93 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 103 insertions(+), 6 deletions(-)
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 14f0897fa..2b12604b9 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -109,6 +109,8 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
+static inline bool dp_packet_is_tso(struct dp_packet *b);
+
void *dp_packet_resize_l2(struct dp_packet *, int increment);
void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
static inline void *dp_packet_eth(const struct dp_packet *);
@@ -514,6 +516,14 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->mbuf.buf_len = s;
}
+static inline bool
+dp_packet_is_tso(struct dp_packet *b)
+{
+ return (b->mbuf.ol_flags & (PKT_TX_TCP_SEG | PKT_TX_L4_MASK))
+ ? true
+ : false;
+}
+
/* Returns the RSS hash of the packet 'p'. Note that the returned value is
* correct only if 'dp_packet_rss_valid(p)' returns true */
static inline uint32_t
@@ -643,6 +653,12 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->allocated_ = s;
}
+static inline bool
+dp_packet_is_tso(struct dp_packet *b OVS_UNUSED)
+{
+ return false;
+}
+
/* Returns the RSS hash of the packet 'p'. Note that the returned value is
* correct only if 'dp_packet_rss_valid(p)' returns true */
static inline uint32_t
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 7eb294366..cfbd9a9e5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -371,7 +371,8 @@ struct ingress_policer {
enum dpdk_hw_ol_features {
NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
- NETDEV_RX_HW_SCATTER = 1 << 2
+ NETDEV_RX_HW_SCATTER = 1 << 2,
+ NETDEV_TX_TSO_OFFLOAD = 1 << 3,
};
/*
@@ -640,7 +641,7 @@ dpdk_calculate_mbufs(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
return n_mbufs;
}
-static struct dpdk_mp *
+static struct rte_mempool *
dpdk_mp64k_create(uint32_t socket_id)
{
struct rte_mempool *mp;
@@ -990,6 +991,12 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
conf.rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
}
+ if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO;
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+ }
+
/* Limit configured rss hash functions to only those supported
* by the eth device. */
conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
@@ -1091,6 +1098,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM |
DEV_RX_OFFLOAD_IPV4_CKSUM;
+ uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO |
+ DEV_TX_OFFLOAD_TCP_CKSUM |
+ DEV_TX_OFFLOAD_IPV4_CKSUM;
rte_eth_dev_info_get(dev->port_id, &info);
@@ -1117,6 +1127,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
}
+ if (info.tx_offload_capa & tx_tso_offload_capa) {
+ dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ } else {
+ dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+ VLOG_WARN("Tx TSO offload is not supported on port "
+ DPDK_PORT_ID_FMT, dev->port_id);
+ }
+
n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
@@ -1361,6 +1379,7 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
goto out;
}
+#if 0
err = rte_vhost_driver_disable_features(dev->vhost_id,
1ULL << VIRTIO_NET_F_HOST_TSO4
| 1ULL << VIRTIO_NET_F_HOST_TSO6
@@ -1370,6 +1389,7 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
"port: %s\n", name);
goto out;
}
+#endif
err = rte_vhost_driver_start(dev->vhost_id);
if (err) {
@@ -1702,6 +1722,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
} else {
smap_add(args, "rx_csum_offload", "false");
}
+ if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+ smap_add(args, "tx_tso_offload", "true");
+ } else {
+ smap_add(args, "tx_tso_offload", "false");
+ }
smap_add(args, "lsc_interrupt_mode",
dev->lsc_interrupt_mode ? "true" : "false");
}
@@ -2081,6 +2106,41 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
rte_free(rx);
}
+/* Should only be called if PKT_TX_TCP_SEG is set in ol_flags.
+ * Furthermore, it also sets the PKT_TX_TCP_CKSUM and PKT_TX_IP_CKSUM flags,
+ * and PKT_TX_IPV4 and PKT_TX_IPV6 in case the packet is IPv4 or IPv6,
+ * respectively. */
+static void
+netdev_dpdk_prep_tso_packet(struct rte_mbuf *mbuf, int mtu)
+{
+ struct dp_packet *pkt;
+ struct tcp_header *th;
+
+ pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
+ mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+ mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ th = dp_packet_l4(pkt);
+ /* There's no layer 4 in the packet. */
+ if (!th) {
+ return;
+ }
+ mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
+
+ if (!(mbuf->ol_flags & PKT_TX_TCP_SEG)) {
+ return;
+ }
+
+ /* Prepare packet for egress. */
+ mbuf->ol_flags |= PKT_TX_TCP_SEG;
+ mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
+ mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+
+ /* Set the size of each TCP segment, based on the MTU of the device. */
+ mbuf->tso_segsz = mtu - mbuf->l3_len - mbuf->l4_len;
+}
+
/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
* 'pkts', even in case of failure.
*
@@ -2374,13 +2434,29 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
int cnt = 0;
struct rte_mbuf *pkt;
+ /* Filter oversized packets, unless are marked for TSO. */
for (i = 0; i < pkt_cnt; i++) {
pkt = pkts[i];
if (OVS_UNLIKELY(pkt->pkt_len > dev->max_packet_len)) {
- VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " max_packet_len %d",
- dev->up.name, pkt->pkt_len, dev->max_packet_len);
- rte_pktmbuf_free(pkt);
- continue;
+ if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+ VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " "
+ "max_packet_len %d",
+ dev->up.name, pkt->pkt_len, dev->max_packet_len);
+ rte_pktmbuf_free(pkt);
+ continue;
+ } else {
+ /* 'If' the 'pkt' is intended for a VM, prepare it for sending,
+ * 'else' the 'pkt' will not actually traverse the NIC, but
+ * rather travel between VMs on the same host. */
+ if (dev->type != DPDK_DEV_VHOST) {
+ netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+ }
+ }
+ } else {
+ if (dev->type != DPDK_DEV_VHOST) {
+ netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+ }
+
}
if (OVS_UNLIKELY(i != cnt)) {
@@ -4277,6 +4353,9 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
dev->tx_q[0].map = 0;
}
+ dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ VLOG_DBG("%s: TSO enabled on vhost port", dev->up.name);
+
netdev_dpdk_remap_txqs(dev);
err = netdev_dpdk_mempool_configure(dev);
@@ -4370,6 +4449,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
goto unlock;
}
+#if 0
err = rte_vhost_driver_disable_features(dev->vhost_id,
1ULL << VIRTIO_NET_F_HOST_TSO4
| 1ULL << VIRTIO_NET_F_HOST_TSO6
@@ -4379,6 +4459,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
"client port: %s\n", dev->up.name);
goto unlock;
}
+#endif
err = rte_vhost_driver_start(dev->vhost_id);
if (err) {
--
2.20.1
More information about the dev
mailing list