[ovs-dev] [PATCH V3 2/4] Add GSO support for DPDK data path

yang_y_yi at 163.com yang_y_yi at 163.com
Fri Aug 7 10:56:46 UTC 2020


From: Yi Yang <yangyi01 at inspur.com>

GSO(Generic Segment Offload) can segment large UDP
 and TCP packet to small packets per MTU of destination
, especially for the case that physical NIC can't
do hardware offload VXLAN TSO and VXLAN UFO, GSO can
make sure userspace TSO can still work but not drop.

In addition, GSO can help improve UDP performane when
UFO is enabled in VM.

GSO can support TCP, UDP, VXLAN TCP, VXLAN UDP, it is
done in Tx function of physical NIC.

Signed-off-by: Yi Yang <yangyi01 at inspur.com>
---
 lib/dp-packet.h    |  21 +++-
 lib/netdev-dpdk.c  | 358 +++++++++++++++++++++++++++++++++++++++++++++++++----
 lib/netdev-linux.c |  17 ++-
 lib/netdev.c       |  67 +++++++---
 4 files changed, 417 insertions(+), 46 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 79895f2..c33868d 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -83,6 +83,8 @@ enum dp_packet_offload_mask {
     DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM, 0x800),
     /* VXLAN TCP Segmentation Offload. */
     DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_VXLAN, 0x1000),
+    /* UDP Segmentation Offload. */
+    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_SEG, PKT_TX_UDP_SEG, 0x2000),
     /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
 };
 
@@ -97,7 +99,8 @@ enum dp_packet_offload_mask {
                                      DP_PACKET_OL_TX_IPV6          | \
                                      DP_PACKET_OL_TX_TCP_CKSUM     | \
                                      DP_PACKET_OL_TX_UDP_CKSUM     | \
-                                     DP_PACKET_OL_TX_SCTP_CKSUM)
+                                     DP_PACKET_OL_TX_SCTP_CKSUM    | \
+                                     DP_PACKET_OL_TX_UDP_SEG)
 
 #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
                                  DP_PACKET_OL_TX_UDP_CKSUM | \
@@ -956,6 +959,13 @@ dp_packet_hwol_is_tso(const struct dp_packet *b)
     return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TCP_SEG);
 }
 
+/* Returns 'true' if packet 'b' is marked for UDP segmentation offloading. */
+static inline bool
+dp_packet_hwol_is_uso(const struct dp_packet *b)
+{
+    return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_UDP_SEG);
+}
+
 /* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */
 static inline bool
 dp_packet_hwol_is_ipv4(const struct dp_packet *b)
@@ -1034,6 +1044,15 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
     *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
 }
 
+/* Mark packet 'b' for UDP segmentation offloading.  It implies that
+ * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading
+ * and also for UDP checksum offloading. */
+static inline void
+dp_packet_hwol_set_udp_seg(struct dp_packet *b)
+{
+    *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_SEG;
+}
+
 #ifdef DPDK_NETDEV
 /* Mark packet 'b' for VXLAN TCP segmentation offloading. */
 static inline void
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 30493ed..888a45e 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -38,13 +38,15 @@
 #include <rte_errno.h>
 #include <rte_ethdev.h>
 #include <rte_flow.h>
+#include <rte_gso.h>
+#include <rte_ip.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 #include <rte_pci.h>
 #include <rte_version.h>
 #include <rte_vhost.h>
-#include <rte_ip.h>
+#include <rte_ip_frag.h>
 
 #include "cmap.h"
 #include "coverage.h"
@@ -162,6 +164,7 @@ typedef uint16_t dpdk_port_t;
                                    | DEV_TX_OFFLOAD_UDP_CKSUM    \
                                    | DEV_TX_OFFLOAD_IPV4_CKSUM)
 
+#define MAX_GSO_MBUFS 64
 
 static const struct rte_eth_conf port_conf = {
     .rxmode = {
@@ -2171,6 +2174,16 @@ is_local_to_local(uint16_t src_port_id, struct netdev_dpdk *dev)
     return ret;
 }
 
+static uint16_t
+get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
+{
+        if (ethertype == htons(RTE_ETHER_TYPE_IPV4)) {
+                return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
+        } else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
+                return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
+        }
+}
+
 /* Prepare the packet for HWOL.
  * Return True if the packet is OK to continue. */
 static bool
@@ -2203,10 +2216,9 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
          * also can't be handled. So PKT_TX_TUNNEL_VXLAN must be cleared
          * outer_l2_len and outer_l3_len must be zeroed.
          */
-        if (!(dev->up.ol_flags & NETDEV_TX_OFFLOAD_VXLAN_TSO)
-            || (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
+        if (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
             && (mbuf->pkt_len <= 1450 + mbuf->outer_l2_len + mbuf->outer_l3_len
-                + mbuf->l2_len)))  {
+                + mbuf->l2_len))  {
             mbuf->ol_flags &= ~PKT_TX_TUNNEL_VXLAN;
             mbuf->l2_len -= sizeof(struct udp_header)
                         + sizeof(struct vxlanhdr);
@@ -2249,7 +2261,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
             /* inner IP checksum offload offload */
             mbuf->ol_flags |= PKT_TX_IP_CKSUM;
         }
-    } else if (mbuf->ol_flags & PKT_TX_L4_MASK) {
+    } else if (mbuf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) {
         /* Handle VLAN TSO */
             /* no inner IP checksum for IPV6 */
         if (mbuf->ol_flags & PKT_TX_IPV4) {
@@ -2273,6 +2285,18 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
         mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
         mbuf->outer_l2_len = 0;
         mbuf->outer_l3_len = 0;
+
+        /* In case of GRO, PKT_TX_TCP_SEG or PKT_TX_UDP_SEG wasn't set by GRO
+         * APIs, here is a place we can mark it.
+         */
+        if ((mbuf->pkt_len > 1464)
+            && (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)))) {
+            if (l4_proto == IPPROTO_UDP) {
+                mbuf->ol_flags |= PKT_TX_UDP_SEG;
+            } else if (l4_proto == IPPROTO_TCP) {
+                mbuf->ol_flags |= PKT_TX_TCP_SEG;
+            }
+        }
     }
 
     /* It is possible that l4_len isn't set for vhostuserclient */
@@ -2284,6 +2308,10 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
         mbuf->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
     }
 
+    if ((l4_proto != IPPROTO_UDP) && (l4_proto != IPPROTO_TCP)) {
+        return true;
+    }
+
     if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
         if (l4_proto != IPPROTO_UDP) {
             VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header"
@@ -2294,11 +2322,13 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
                mbuf->ol_flags & PKT_TX_TCP_CKSUM) {
         if (l4_proto != IPPROTO_TCP) {
             VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
-                         " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
+                         " pkt len: %"PRIu32" l4_proto = %d",
+                         dev->up.name, mbuf->pkt_len, l4_proto);
             return false;
         }
 
-        if (mbuf->pkt_len - mbuf->l2_len > 1450) {
+        if (mbuf->pkt_len > 1450 + mbuf->outer_l2_len + mbuf->outer_l3_len
+            + mbuf->l2_len) {
             dp_packet_hwol_set_tcp_seg(pkt);
         }
 
@@ -2308,7 +2338,66 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
         } else {
             mbuf->tso_segsz = 0;
         }
+
+        if (!(dev->up.ol_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
+            /* PKT_TX_TCP_CKSUM must be cleaned for GSO because
+             * tcp checksum only can be caculated by software for
+             * GSO case.
+             */
+            mbuf->ol_flags &= ~PKT_TX_TCP_CKSUM;
+        }
     }
+
+    /* UDP GSO if necessary */
+    if (l4_proto == IPPROTO_UDP) {
+        /* VXLAN GSO can be done here */
+        if ((mbuf->ol_flags & PKT_TX_UDP_SEG) ||
+            (mbuf->pkt_len > (1450 + mbuf->outer_l2_len + mbuf->outer_l3_len +
+                             mbuf->l2_len))) {
+            dp_packet_hwol_set_udp_seg(pkt);
+
+            /* For UDP GSO, udp checksum must be calculated by software */
+            if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
+                void *l3_hdr, *l4_hdr;
+                struct rte_udp_hdr *udp_hdr;
+
+                /* PKT_TX_UDP_CKSUM must be cleaned for GSO because
+                 * udp checksum only can be caculated by software for
+                 * GSO case.
+                 */
+                mbuf->ol_flags &= ~PKT_TX_UDP_CKSUM;
+
+                eth_hdr = (struct rte_ether_hdr *)
+                    ((uint8_t *)eth_hdr + mbuf->outer_l2_len +
+                               mbuf->outer_l3_len +
+                               sizeof(struct udp_header) +
+                               sizeof(struct vxlanhdr));
+                l3_hdr = (uint8_t *)eth_hdr + mbuf->l2_len -
+                         sizeof(struct udp_header) -
+                         sizeof(struct vxlanhdr);
+                l4_hdr = (uint8_t *)l3_hdr + mbuf->l3_len;
+                ip_hdr = (struct rte_ipv4_hdr *)l3_hdr;
+                ip_hdr->hdr_checksum = 0;
+                ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
+                /* Don't touch UDP checksum if it is ip fragment */
+                if (!rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) {
+                    udp_hdr = (struct rte_udp_hdr *)l4_hdr;
+                    udp_hdr->dgram_cksum = 0;
+                    udp_hdr->dgram_cksum =
+                        get_udptcp_checksum(l3_hdr, l4_hdr,
+                                            eth_hdr->ether_type);
+                }
+            }
+
+            /* FOR GSO, gso_size includes l2_len + l3_len */
+            mbuf->tso_segsz = 1450 + mbuf->outer_l2_len + mbuf->outer_l3_len +
+                              mbuf->l2_len;
+            if (mbuf->tso_segsz > dev->mtu) {
+                mbuf->tso_segsz = dev->mtu;
+            }
+        }
+    }
+
     return true;
 }
 
@@ -2339,24 +2428,19 @@ netdev_dpdk_prep_hwol_batch(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
     return cnt;
 }
 
-/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes ownership of
- * 'pkts', even in case of failure.
- *
- * Returns the number of packets that weren't transmitted. */
 static inline int
-netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
-                         struct rte_mbuf **pkts, int cnt)
+__netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
+                           struct rte_mbuf **pkts, int cnt)
 {
     uint32_t nb_tx = 0;
-    uint16_t nb_tx_prep = cnt;
+    uint32_t nb_tx_prep;
 
-    if (userspace_tso_enabled()) {
-        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
-        if (nb_tx_prep != cnt) {
-            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
-                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
-                         cnt, rte_strerror(rte_errno));
-        }
+    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
+    if (nb_tx_prep != cnt) {
+        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+                          "Only %u/%u are valid: %s",
+                     dev->up.name, nb_tx_prep,
+                     cnt, rte_strerror(rte_errno));
     }
 
     while (nb_tx != nb_tx_prep) {
@@ -2384,6 +2468,200 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
     return cnt - nb_tx;
 }
 
+static inline void
+set_multiseg_udptcp_cksum(struct rte_mbuf *mbuf)
+{
+    uint16_t l3_offset = mbuf->outer_l2_len + mbuf->outer_l3_len
+                         + mbuf->l2_len;
+    struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
+        (rte_pktmbuf_mtod(mbuf, char *) + l3_offset);
+    struct rte_tcp_hdr *tcp_hdr;
+    uint32_t l4_hdr_len;
+    uint8_t *l4_hdr;
+    struct rte_mbuf *next = mbuf->next;
+    uint32_t cksum = 0;
+    uint16_t l4_proto;
+    uint32_t inner_cksum;
+
+    l4_proto = ipv4_hdr->next_proto_id;
+    if ((l4_proto != IPPROTO_UDP) && (l4_proto != IPPROTO_TCP)) {
+        return;
+    }
+
+    if (l4_proto == IPPROTO_TCP) {
+        /* For TCP GSO, inner TCP header is in every seg,
+         * TCP checksum has to be calculated by software.
+         */
+
+        l4_hdr_len = mbuf->data_len - l3_offset
+                     - sizeof(struct rte_ipv4_hdr);
+        l4_hdr = (uint8_t *)(ipv4_hdr + 1);
+        tcp_hdr = (struct rte_tcp_hdr *)l4_hdr;
+        tcp_hdr->cksum = 0;
+    }
+
+    /* Set inner ip checksum */
+    ipv4_hdr->hdr_checksum = 0;
+    ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+
+    if (l4_proto == IPPROTO_TCP) {
+        cksum = rte_raw_cksum(l4_hdr, l4_hdr_len);
+    } else if (l4_proto == IPPROTO_UDP) {
+        if (next == NULL) {
+            /* It wasn't GSOed */
+            cksum = rte_raw_cksum(ipv4_hdr + 1,
+                                  ntohs(ipv4_hdr->total_length)
+                                      - sizeof(struct rte_ipv4_hdr));
+        } else {
+            cksum = 0;
+        }
+    }
+
+    /* It was GSOed */
+    while (next) {
+        cksum += rte_raw_cksum(rte_pktmbuf_mtod(next, char *), next->data_len);
+        next = next->next;
+    }
+
+    /* Save cksum to inner_cksum, outer udp checksum needs it */
+    inner_cksum = cksum;
+
+    cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
+    cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+    cksum = (~cksum) & 0xffff;
+    if (cksum == 0) {
+        cksum = 0xffff;
+    }
+
+    /* Set inner TCP checksum */
+    if (l4_proto == IPPROTO_TCP) {
+        tcp_hdr->cksum = (uint16_t)cksum;
+    }
+
+    /* Set outer udp checksum in case of VXLAN */
+    if (mbuf->outer_l2_len != 0) {
+        ipv4_hdr = (struct rte_ipv4_hdr *)
+            (rte_pktmbuf_mtod(mbuf, char *) + mbuf->outer_l2_len);
+        struct rte_udp_hdr *udp_hdr = (struct rte_udp_hdr *)
+            (ipv4_hdr + 1);
+
+        /* Set outer ip checksum */
+        ipv4_hdr->hdr_checksum = 0;
+        ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+
+        udp_hdr->dgram_cksum = 0;
+        cksum = rte_ipv4_phdr_cksum(ipv4_hdr, 0);
+        cksum += rte_raw_cksum(udp_hdr, mbuf->l2_len + mbuf->l3_len);
+        cksum += inner_cksum;
+        if (l4_proto == IPPROTO_TCP) {
+            cksum += tcp_hdr->cksum;
+        }
+        cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+        cksum = (~cksum) & 0xffff;
+        if (cksum == 0) {
+            cksum = 0xffff;
+        }
+        udp_hdr->dgram_cksum = (uint16_t)cksum;
+    }
+}
+
+/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes ownership of
+ * 'pkts', even in case of failure.
+ *
+ * Returns the number of packets that weren't transmitted. */
+static inline int
+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
+                         struct rte_mbuf **pkts, int cnt)
+{
+    uint32_t nb_tx = 0;
+    int i;
+    int ret;
+    int failures = 0;
+
+    if (userspace_tso_enabled()) {
+        /* The best point to do gso */
+        struct rte_gso_ctx gso_ctx;
+        struct rte_mbuf *gso_mbufs[MAX_GSO_MBUFS];
+        int tx_start = -1;
+
+        /* Setup gso context */
+        gso_ctx.direct_pool = dev->dpdk_mp->mp;
+        gso_ctx.indirect_pool = dev->dpdk_mp->mp;
+
+        /* Do GSO if needed */
+        for (i = 0; i < cnt; i++) {
+            if (((pkts[i]->ol_flags & PKT_TX_UDP_SEG) &&
+                  !(dev->hw_ol_features & DEV_TX_OFFLOAD_UDP_TSO)) ||
+                ((pkts[i]->ol_flags & PKT_TX_TCP_SEG) &&
+                  ((!(dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD)
+                   && (pkts[i]->ol_flags & PKT_TX_TUNNEL_VXLAN))
+                   || !(dev->hw_ol_features & DEV_TX_OFFLOAD_TCP_TSO)))) {
+                /* Send non GSO packets before pkts[i] */
+                if (tx_start != -1) {
+                    failures += __netdev_dpdk_eth_tx_burst(
+                                    dev, qid,
+                                    pkts + tx_start,
+                                    i - tx_start);
+                }
+                tx_start = -1;
+
+                gso_ctx.gso_types = 0;
+                gso_ctx.gso_size = pkts[i]->tso_segsz;
+                gso_ctx.flag = 0;
+                if (pkts[i]->ol_flags & PKT_TX_TUNNEL_VXLAN) {
+                    gso_ctx.gso_types |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
+                }
+                if (pkts[i]->ol_flags & PKT_TX_UDP_SEG) {
+                    gso_ctx.gso_types |= DEV_TX_OFFLOAD_UDP_TSO;
+                } else if (pkts[i]->ol_flags & PKT_TX_TCP_SEG) {
+                    gso_ctx.gso_types |= DEV_TX_OFFLOAD_TCP_TSO;
+                    pkts[i]->ol_flags &= ~PKT_TX_TCP_CKSUM;
+                }
+                ret = rte_gso_segment(pkts[i], /* packet to segment */
+                                      &gso_ctx, /* gso context */
+                                      /* gso output mbufs */
+                                      (struct rte_mbuf **)&gso_mbufs,
+                                      MAX_GSO_MBUFS);
+                if (ret < 0) {
+                    rte_pktmbuf_free(pkts[i]);
+                } else {
+                    int j, k;
+                    struct rte_mbuf * next_part;
+                    nb_tx = ret;
+                    for (j = 0; j < nb_tx; j++) {
+                        set_multiseg_udptcp_cksum(gso_mbufs[j]);
+                        /* Clear them because of no offload */
+                        gso_mbufs[j]->ol_flags = 0;
+                        gso_mbufs[j]->outer_l2_len = 0;
+                        gso_mbufs[j]->outer_l3_len = 0;
+                        gso_mbufs[j]->l2_len = 0;
+                        gso_mbufs[j]->l3_len = 0;
+                        gso_mbufs[j]->l4_len = 0;
+                        next_part = gso_mbufs[j];
+                        for (k = 0; k < gso_mbufs[j]->nb_segs; k++) {
+                            next_part = next_part->next;
+                        }
+                    }
+                    __netdev_dpdk_eth_tx_burst(dev, qid, gso_mbufs, nb_tx);
+                }
+                continue;
+            }
+            if (tx_start == -1) {
+                tx_start = i;
+            }
+        }
+
+        if (tx_start != -1) {
+            /* Send non GSO packets before pkts[i] */
+            failures += __netdev_dpdk_eth_tx_burst(dev, qid, pkts + tx_start,
+                                       i - tx_start);
+        }
+        return failures;
+    }
+
+    return __netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
+}
+
 static inline bool
 netdev_dpdk_srtcm_policer_pkt_handle(struct rte_meter_srtcm *meter,
                                      struct rte_meter_srtcm_profile *profile,
@@ -2786,10 +3064,24 @@ out:
     }
 }
 
+struct shinfo_arg {
+    void * buf;
+    struct rte_mbuf *mbuf;
+};
+
+/* For GSO case, the extended mbuf only can be freed by
+ * netdev_dpdk_extbuf_free
+ */
 static void
-netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
+netdev_dpdk_extbuf_free(struct rte_mbuf *m, void *opaque)
 {
-    rte_free(opaque);
+    struct shinfo_arg *arg = (struct shinfo_arg *)opaque;
+
+    rte_free(arg->buf);
+    if (m != arg->mbuf) {
+        rte_pktmbuf_free(arg->mbuf);
+    }
+    free(arg);
 }
 
 static struct rte_mbuf *
@@ -2821,8 +3113,11 @@ dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len)
 
     /* Initialize shinfo. */
     if (shinfo) {
+        struct shinfo_arg *arg = xmalloc(sizeof(struct shinfo_arg));
+        arg->buf = buf;
+        arg->mbuf = pkt;
         shinfo->free_cb = netdev_dpdk_extbuf_free;
-        shinfo->fcb_opaque = buf;
+        shinfo->fcb_opaque = arg;
         rte_mbuf_ext_refcnt_set(shinfo, 1);
     } else {
         shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
@@ -2852,6 +3147,10 @@ dpdk_pktmbuf_alloc(struct rte_mempool *mp, uint32_t data_len)
         return NULL;
     }
 
+    if (unlikely(pkt->shinfo != NULL)) {
+        pkt->shinfo = NULL;
+    }
+
     if (rte_pktmbuf_tailroom(pkt) >= data_len) {
         return pkt;
     }
@@ -5192,6 +5491,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
     int err;
     uint64_t vhost_flags = 0;
     uint64_t vhost_unsup_flags;
+    uint64_t vhost_supported_flags;
     bool zc_enabled;
 
     ovs_mutex_lock(&dev->mutex);
@@ -5277,6 +5577,16 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
             goto unlock;
         }
 
+        err = rte_vhost_driver_get_features(dev->vhost_id,
+                                            &vhost_supported_flags);
+        if (err) {
+            VLOG_ERR("rte_vhost_driver_get_features failed for "
+                     "vhost user client port: %s\n", dev->up.name);
+            goto unlock;
+        }
+        VLOG_INFO("vhostuserclient port %s features: 0x%016lx",
+                  dev->up.name, vhost_supported_flags);
+
         err = rte_vhost_driver_start(dev->vhost_id);
         if (err) {
             VLOG_ERR("rte_vhost_driver_start failed for vhost user "
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 9f830b4..557f139 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -6566,6 +6566,16 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
 
         l4_len = TCP_OFFSET(tcp_hdr->tcp_ctl) * 4;
         dp_packet_hwol_set_l4_len(b, l4_len);
+    } else if (*l4proto == IPPROTO_UDP) {
+        struct udp_header *udp_hdr =  dp_packet_at(b, l2_len + l3_len,
+                                          sizeof(struct udp_header));
+
+        if (!udp_hdr) {
+            return -EINVAL;
+        }
+
+        l4_len = sizeof(struct udp_header);
+        dp_packet_hwol_set_l4_len(b, l4_len);
     }
 
     return 0;
@@ -6581,10 +6591,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
         return -EINVAL;
     }
 
-    if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
-        return 0;
-    }
-
     if (netdev_linux_parse_l2(b, &l4proto)) {
         return -EINVAL;
     }
@@ -6609,6 +6615,9 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
             || type == VIRTIO_NET_HDR_GSO_TCPV6) {
             dp_packet_hwol_set_tcp_seg(b);
         }
+        if (type == VIRTIO_NET_HDR_GSO_UDP) {
+            dp_packet_hwol_set_udp_seg(b);
+        }
     }
 
     return 0;
diff --git a/lib/netdev.c b/lib/netdev.c
index 64583d1..02f28c8 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -825,23 +825,41 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
 {
     uint64_t l4_mask;
 
-    if (dp_packet_hwol_is_vxlan_tcp_seg(packet)
-        && (dp_packet_hwol_is_tso(packet) || dp_packet_hwol_l4_mask(packet))
-        && !(netdev_flags & NETDEV_TX_OFFLOAD_VXLAN_TSO)) {
-        /* Fall back to GSO in software. */
-        VLOG_ERR_BUF(errormsg, "No VXLAN TSO support");
-        return false;
-    }
-
-    if (dp_packet_hwol_is_tso(packet)
-        && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
-            /* Fall back to GSO in software. */
-            VLOG_ERR_BUF(errormsg, "No TSO support");
-            return false;
-    }
-
+    /* GSO can handle TSO by software even if device can't handle hardware
+     * offload, so needn't check it here.
+     */
     l4_mask = dp_packet_hwol_l4_mask(packet);
     if (l4_mask) {
+        /* Calculate checksum for VLAN TSO case when no hardware offload
+         * feature is available. Note: for VXLAN TSO case, checksum has
+         * been calculated before here, so it won't be done here again
+         * because checksum flags in packet->m.ol_flags have been cleaned.
+         */
+        if (dp_packet_hwol_l4_is_tcp(packet)
+            && !dp_packet_hwol_is_vxlan_tcp_seg(packet)
+            && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
+            packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_TCP_CKSUM;
+            /* Only calculate TCP checksum for non-TSO packet,
+             * it will be calculated after GSO for TSO packet.
+             */
+            if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_TCP_SEG)) {
+                calculate_tcpudp_checksum(packet);
+            }
+            return true;
+        } else if (dp_packet_hwol_l4_is_udp(packet)
+            && !dp_packet_hwol_is_vxlan_tcp_seg(packet)
+            && !(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
+            packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_UDP_CKSUM;
+            /* Only calculate UDP checksum for non-UFO packet,
+             * it will be calculated immediately before GSO for
+             * UFO packet.
+             */
+            if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_UDP_SEG)) {
+                calculate_tcpudp_checksum(packet);
+            }
+            return true;
+        }
+
         if (dp_packet_hwol_l4_is_tcp(packet)) {
             if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
                 /* Fall back to TCP csum in software. */
@@ -1013,11 +1031,26 @@ netdev_push_header(const struct netdev *netdev,
                 /* VXLAN offload can't support udp checksum offload
                  * for inner udp packet, so udp checksum must be set
                  * before push header in order that outer checksum can
-                 * be set correctly.
+                 * be set correctly. But GSO code will set udp checksum
+                 * if packet->mbuf.ol_flags has DP_PACKET_OL_TX_UDP_SEG.
                  */
                 if (dp_packet_hwol_l4_is_udp(packet)) {
                     packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_UDP_CKSUM;
-                    calculate_tcpudp_checksum(packet);
+                    /* Only calculate UDP checksum for non-UFO packet,
+                     * it will be calculated immediately before GSO for
+                     * UFO packet.
+                     */
+                    if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_UDP_SEG)) {
+                        calculate_tcpudp_checksum(packet);
+                    }
+                } else if (dp_packet_hwol_l4_is_tcp(packet)) {
+                    packet->mbuf.ol_flags &= ~DP_PACKET_OL_TX_TCP_CKSUM;
+                    /* Only calculate TCP checksum for non-TSO packet,
+                     * it will be calculated after GSO for TSO packet.
+                     */
+                    if (!(packet->mbuf.ol_flags & DP_PACKET_OL_TX_TCP_SEG)) {
+                        calculate_tcpudp_checksum(packet);
+                    }
                 }
             }
             netdev->netdev_class->push_header(netdev, packet, data);
-- 
2.7.4



More information about the dev mailing list