[ovs-dev] [PATCH v5 2/7] netdev-dpdk: Consider packets marked for TSO.

Michal Obrembski michalx.obrembski at intel.com
Wed Sep 11 14:10:00 UTC 2019


From: Tiago Lam <tiago.lam at intel.com>

Previously, TSO was being explicity disabled on vhost interfaces,
meaning the guests wouldn't have TSO support negotiated in. With TSO
negotiated and enabled, packets are now marked for TSO, through the
PKT_TX_TCP_SEG flag.

In order to deal with this type of packets, a new function,
netdev_dpdk_prep_tso_packet(), has been introduced, with the main
purpose of setting correctly the l2, l3 and l4 length members of the
mbuf struct, and the appropriate ol_flags. This function supports TSO
both in IPv4 and IPv6.

netdev_dpdk_prep_tso_packet() is then only called when packets are
marked with the PKT_TX_TCP_SEG flag, meaning they have been marked for
TSO, and when the packet will be traversing the NIC.

Additionally, if a packet is marked for TSO but the egress netdev
doesn't support it, the packet is dropped.

Co-authored-by: Mark Kavanagh <mark.b.kavanagh at intel.com>

Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
Signed-off-by: Tiago Lam <tiago.lam at intel.com>
---
 lib/dp-packet.h    |  16 +++++++
 lib/netdev-bsd.c   |  11 ++++-
 lib/netdev-dpdk.c  | 122 ++++++++++++++++++++++++++++++++++++++++++-----------
 lib/netdev-dummy.c |  11 ++++-
 lib/netdev-linux.c |  15 +++++++
 5 files changed, 149 insertions(+), 26 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index f091265..96136ed 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -122,6 +122,8 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
 static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
 static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
 
+static inline bool dp_packet_is_tso(struct dp_packet *b);
+
 void *dp_packet_resize_l2(struct dp_packet *, int increment);
 void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
 static inline void *dp_packet_eth(const struct dp_packet *);
@@ -797,6 +799,14 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
     b->mbuf.buf_len = s;
 }
 
+static inline bool
+dp_packet_is_tso(struct dp_packet *b)
+{
+    return (b->mbuf.ol_flags & (PKT_TX_TCP_SEG | PKT_TX_L4_MASK))
+           ? true
+           : false;
+}
+
 static inline void
 dp_packet_copy_mbuf_flags(struct dp_packet *dst, const struct dp_packet *src)
 {
@@ -1007,6 +1017,12 @@ dp_packet_get_allocated(const struct dp_packet *b)
     return b->allocated_;
 }
 
+static inline bool
+dp_packet_is_tso(struct dp_packet *b OVS_UNUSED)
+{
+    return false;
+}
+
 static inline void
 dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
 {
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 6224dab..a1172a5 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -700,11 +700,20 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
     }
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        size_t size = dp_packet_size(packet);
+
+        /* TSO not supported in BSD netdev */
+        if (dp_packet_is_tso(packet)) {
+            VLOG_WARN_RL(&rl, "%s: No TSO support on port, TSO packet of size "
+                         "%" PRIuSIZE " dropped", name, size);
+
+            continue;
+        }
+
         /* We need the whole data to send the packet on the device */
         dp_packet_linearize(packet);
 
         const void *data = dp_packet_data(packet);
-        size_t size = dp_packet_size(packet);
 
         while (!error) {
             ssize_t retval;
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 6797081..2304f28 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1384,14 +1384,16 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
         goto out;
     }
 
-    err = rte_vhost_driver_disable_features(dev->vhost_id,
-                                1ULL << VIRTIO_NET_F_HOST_TSO4
-                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
-                                | 1ULL << VIRTIO_NET_F_CSUM);
-    if (err) {
-        VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
-                 "port: %s\n", name);
-        goto out;
+    if (!dpdk_multi_segment_mbufs) {
+        err = rte_vhost_driver_disable_features(dev->vhost_id,
+                                    1ULL << VIRTIO_NET_F_HOST_TSO4
+                                    | 1ULL << VIRTIO_NET_F_HOST_TSO6
+                                    | 1ULL << VIRTIO_NET_F_CSUM);
+        if (err) {
+            VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
+                     "client port: %s\n", dev->up.name);
+            goto out;
+        }
     }
 
     err = rte_vhost_driver_start(dev->vhost_id);
@@ -2104,6 +2106,44 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
     rte_free(rx);
 }
 
+/* Should only be called if PKT_TX_TCP_SEG is set in ol_flags.
+ * Furthermore, it also sets the PKT_TX_TCP_CKSUM and PKT_TX_IP_CKSUM flags,
+ * and PKT_TX_IPV4 and PKT_TX_IPV6 in case the packet is IPv4 or IPv6,
+ * respectively. */
+static void
+netdev_dpdk_prep_tso_packet(struct rte_mbuf *mbuf, int mtu)
+{
+    struct dp_packet *pkt;
+    struct tcp_header *th;
+
+    pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
+    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+    th = dp_packet_l4(pkt);
+    /* There's no layer 4 in the packet. */
+    if (!th) {
+        return;
+    }
+    mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
+    mbuf->outer_l2_len = 0;
+    mbuf->outer_l3_len = 0;
+
+    /* Reset packet RX RSS flag to reuse in egress. */
+    dp_packet_mbuf_rss_flag_reset(pkt);
+
+    if (!(mbuf->ol_flags & PKT_TX_TCP_SEG)) {
+        return;
+    }
+
+    /* Prepare packet for egress. */
+    mbuf->ol_flags |= PKT_TX_TCP_SEG;
+    mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
+    mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+
+    /* Set the size of each TCP segment, based on the MTU of the device. */
+    mbuf->tso_segsz = mtu - mbuf->l3_len - mbuf->l4_len;
+}
+
 /* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes ownership of
  * 'pkts', even in case of failure.
  * In case multi-segment mbufs / TSO is being used, it also prepares. In such
@@ -2413,13 +2453,29 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
     int cnt = 0;
     struct rte_mbuf *pkt;
 
+    /* Filter oversized packets, unless are marked for TSO. */
     for (i = 0; i < pkt_cnt; i++) {
         pkt = pkts[i];
+
         if (OVS_UNLIKELY(pkt->pkt_len > dev->max_packet_len)) {
-            VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " max_packet_len %d",
-                         dev->up.name, pkt->pkt_len, dev->max_packet_len);
-            rte_pktmbuf_free(pkt);
-            continue;
+            if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+                VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " "
+                             "max_packet_len %d",
+                             dev->up.name, pkt->pkt_len, dev->max_packet_len);
+                rte_pktmbuf_free(pkt);
+                continue;
+            } else {
+                /* 'If' the 'pkt' is intended for a VM, prepare it for sending,
+                 * 'else' the 'pkt' will not actually traverse the NIC, but
+                 * rather travel between VMs on the same host. */
+                if (dev->type != DPDK_DEV_VHOST) {
+                    netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+                }
+            }
+        } else {
+            if (dev->type != DPDK_DEV_VHOST) {
+                netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+            }
         }
 
         if (OVS_UNLIKELY(i != cnt)) {
@@ -2519,6 +2575,7 @@ dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
                             struct rte_mempool *mp)
 {
     struct rte_mbuf *mbuf, *fmbuf;
+    struct dp_packet *pkt = NULL;
     uint16_t max_data_len;
     uint32_t nb_segs = 0;
     uint32_t size = 0;
@@ -2559,6 +2616,12 @@ dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
     fmbuf->nb_segs = nb_segs;
     fmbuf->pkt_len = size;
 
+    pkt = CONTAINER_OF(fmbuf, struct dp_packet, mbuf);
+    pkt->l2_pad_size = packet->l2_pad_size;
+    pkt->l2_5_ofs = packet->l2_5_ofs;
+    pkt->l3_ofs = packet->l3_ofs;
+    pkt->l4_ofs = packet->l4_ofs;
+
     dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
 
     return 0;
@@ -2593,14 +2656,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 
     for (i = 0; i < cnt; i++) {
         struct dp_packet *packet = batch->packets[i];
+        struct rte_mbuf *pkt = &batch->packets[i]->mbuf;
         uint32_t size = dp_packet_size(packet);
         int err = 0;
 
         if (OVS_UNLIKELY(size > dev->max_packet_len)) {
-            VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
-                         size, dev->max_packet_len);
-            dropped++;
-            continue;
+            if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+                VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
+                             size, dev->max_packet_len);
+                dropped++;
+                continue;
+            }
         }
 
         err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
@@ -2616,6 +2682,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
         }
         dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
 
+        if (dev->type != DPDK_DEV_VHOST) {
+            /* If packet is non-DPDK, at the very least, we need to update the
+             * mbuf length members, even if TSO is not to be performed. */
+            netdev_dpdk_prep_tso_packet(pkts[txcnt], dev->mtu);
+        }
+
         txcnt++;
     }
 
@@ -4466,14 +4538,16 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
             goto unlock;
         }
 
-        err = rte_vhost_driver_disable_features(dev->vhost_id,
-                                    1ULL << VIRTIO_NET_F_HOST_TSO4
-                                    | 1ULL << VIRTIO_NET_F_HOST_TSO6
-                                    | 1ULL << VIRTIO_NET_F_CSUM);
-        if (err) {
-            VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
-                     "client port: %s\n", dev->up.name);
-            goto unlock;
+        if (!dpdk_multi_segment_mbufs) {
+            err = rte_vhost_driver_disable_features(dev->vhost_id,
+                                        1ULL << VIRTIO_NET_F_HOST_TSO4
+                                        | 1ULL << VIRTIO_NET_F_HOST_TSO6
+                                        | 1ULL << VIRTIO_NET_F_CSUM);
+            if (err) {
+                VLOG_ERR("rte_vhost_driver_disable_features failed for vhost "
+                         "user client port: %s\n", dev->up.name);
+                goto unlock;
+            }
         }
 
         err = rte_vhost_driver_start(dev->vhost_id);
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 048725a..11f269f 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1108,11 +1108,20 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
 
     struct dp_packet *packet;
     DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
+        size_t size = dp_packet_size(packet);
+
+        /* TSO not supported in Dummy netdev */
+        if (dp_packet_is_tso(packet)) {
+            VLOG_WARN("%s: No TSO support on port, TSO packet of size "
+                      "%" PRIuSIZE " dropped", netdev_get_name(netdev), size);
+
+            continue;
+        }
+
         /* We need the whole data to send the packet on the device */
         dp_packet_linearize(packet);
 
         const void *buffer = dp_packet_data(packet);
-        size_t size = dp_packet_size(packet);
 
         if (packet->packet_type != htonl(PT_ETH)) {
             error = EPFNOSUPPORT;
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 6439d7c..9a88fb6 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1316,6 +1316,13 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
 
     struct dp_packet *packet;
     DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        /* TSO not supported in Linux netdev */
+        if (dp_packet_is_tso(packet)) {
+            VLOG_WARN_RL(&rl, "%d: No TSO support on port, TSO packet of size "
+                         "%" PRIuSIZE " dropped", sock, size);
+            continue;
+        }
+
         /* We need the whole data to send the packet on the device */
         dp_packet_linearize(packet);
 
@@ -1372,6 +1379,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
         ssize_t retval;
         int error;
 
+        /* TSO not supported in Linux netdev */
+        if (dp_packet_is_tso(packet)) {
+            VLOG_WARN_RL(&rl, "%s: No TSO support on port, TSO packet of size "
+                         "%" PRIuSIZE " dropped", netdev_get_name(netdev_),
+                         size);
+            continue;
+        }
+
         /* We need the whole data to send the packet on the device */
         dp_packet_linearize(packet);
 
-- 
2.7.4



More information about the dev mailing list