[ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic

Zhenyu Gao sysugaozhenyu at gmail.com
Fri Jun 16 12:53:52 UTC 2017


This patch introduce TX tcp-checksum offload support for DPDK pnic.
The feature is disabled by default and can be enabled by setting
tx-checksum-offload, which like:
ovs-vsctl set Interface dpdk-eth3 \
 options:tx-checksum-offload=true
---
 lib/netdev-dpdk.c    | 112 +++++++++++++++++++++++++++++++++++++++++++++++----
 vswitchd/vswitch.xml |  13 ++++--
 2 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bba4de3..5a68a48 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -32,6 +32,7 @@
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 #include <rte_virtio_net.h>
+#include <rte_ip.h>
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -328,6 +329,7 @@ struct ingress_policer {
 
 enum dpdk_hw_ol_features {
     NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
+    NETDEV_TX_CHECKSUM_OFFLOAD = 1 << 1,
 };
 
 struct netdev_dpdk {
@@ -649,6 +651,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
     int diag = 0;
     int i;
     struct rte_eth_conf conf = port_conf;
+    struct rte_eth_txconf *txconf;
+    struct rte_eth_dev_info dev_info;
 
     if (dev->mtu > ETHER_MTU) {
         conf.rxmode.jumbo_frame = 1;
@@ -676,9 +680,16 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
             break;
         }
 
+        rte_eth_dev_info_get(dev->port_id, &dev_info);
+        txconf = &dev_info.default_txconf;
+        if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+            /*Enable tx offload feature on pnic*/
+            txconf->txq_flags = 0;
+        }
+
         for (i = 0; i < n_txq; i++) {
             diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
-                                          dev->socket_id, NULL);
+                                          dev->socket_id, txconf);
             if (diag) {
                 VLOG_INFO("Interface %s txq(%d) setup error: %s",
                           dev->up.name, i, rte_strerror(-diag));
@@ -724,11 +735,15 @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)
 {
     struct rte_eth_dev_info info;
     bool rx_csum_ol_flag = false;
+    bool tx_csum_ol_flag = false;
     uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
                                      DEV_RX_OFFLOAD_TCP_CKSUM |
                                      DEV_RX_OFFLOAD_IPV4_CKSUM;
+    uint32_t tx_chksm_offload_capa = DEV_TX_OFFLOAD_TCP_CKSUM;
+
     rte_eth_dev_info_get(dev->port_id, &info);
     rx_csum_ol_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
+    tx_csum_ol_flag = (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) != 0;
 
     if (rx_csum_ol_flag &&
         (info.rx_offload_capa & rx_chksm_offload_capa) !=
@@ -736,9 +751,15 @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)
         VLOG_WARN_ONCE("Rx checksum offload is not supported on device %"PRIu8,
                        dev->port_id);
         dev->hw_ol_features &= ~NETDEV_RX_CHECKSUM_OFFLOAD;
-        return;
+    } else if (tx_csum_ol_flag &&
+               (info.tx_offload_capa & tx_chksm_offload_capa) !=
+                tx_chksm_offload_capa) {
+        VLOG_WARN_ONCE("Tx checksum offload is not supported on device %"PRIu8,
+                       dev->port_id);
+        dev->hw_ol_features &= ~NETDEV_TX_CHECKSUM_OFFLOAD;
+    } else {
+        netdev_request_reconfigure(&dev->up);
     }
-    netdev_request_reconfigure(&dev->up);
 }
 
 static void
@@ -1119,6 +1140,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
         } else {
             smap_add(args, "rx_csum_offload", "false");
         }
+        if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+            smap_add(args, "tx_csum_offload", "true");
+        } else {
+            smap_add(args, "tx_csum_offload", "false");
+        }
     }
     ovs_mutex_unlock(&dev->mutex);
 
@@ -1210,7 +1236,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
         {RTE_FC_RX_PAUSE, RTE_FC_FULL    }
     };
     bool rx_chksm_ofld;
-    bool temp_flag;
+    bool tx_chksm_ofld;
+    bool temp_rx_flag;
+    bool temp_tx_flag;
+    bool change = false;
     const char *new_devargs;
     int err = 0;
 
@@ -1295,13 +1324,24 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
     /* Rx checksum offload configuration */
     /* By default the Rx checksum offload is ON */
     rx_chksm_ofld = smap_get_bool(args, "rx-checksum-offload", true);
-    temp_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
+    tx_chksm_ofld = smap_get_bool(args, "tx-checksum-offload", false);
+    temp_rx_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
                         != 0;
-    if (temp_flag != rx_chksm_ofld) {
+    temp_tx_flag = (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD)
+                        != 0;
+    if (temp_rx_flag != rx_chksm_ofld) {
         dev->hw_ol_features ^= NETDEV_RX_CHECKSUM_OFFLOAD;
-        dpdk_eth_checksum_offload_configure(dev);
+        change = true;
     }
 
+    if (temp_tx_flag != tx_chksm_ofld) {
+        dev->hw_ol_features ^= NETDEV_TX_CHECKSUM_OFFLOAD;
+        change = true;
+    }
+
+    if (change) {
+        dpdk_eth_checksum_offload_configure(dev);
+    }
 out:
     ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
@@ -1415,6 +1455,55 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
     rte_free(rx);
 }
 
+static inline void
+netdev_prepare_tx_csum(struct dp_packet **pkts, int pkt_cnt)
+{
+    int i = 0;
+
+    for (i = 0; i < pkt_cnt; i++) {
+        ovs_be16 dl_type;
+        struct dp_packet *pkt = (struct dp_packet*)pkts[i];
+        const char *data = dp_packet_data(pkt);
+        char *l3hdr = (char*)(data + pkt->l3_ofs);
+
+        if (pkt->l3_ofs == UINT16_MAX) {
+            continue;
+        }
+
+        if (pkt->l4_ofs == UINT16_MAX) {
+            continue;
+        }
+
+
+        dl_type = *(ovs_be16 *)(data + pkt->l3_ofs - 2);
+        if (dl_type == htons(ETH_TYPE_IP)) {
+
+            if (((struct ipv4_hdr*)l3hdr)->next_proto_id == IPPROTO_TCP) {
+                struct tcp_header *tcp_hdr = (struct tcp_header*)(data + pkt->l4_ofs);
+
+                pkt->mbuf.l2_len = pkt->l3_ofs;
+                pkt->mbuf.l3_len = pkt->l4_ofs - pkt->l3_ofs;
+                pkt->mbuf.ol_flags |= PKT_TX_TCP_CKSUM|PKT_TX_IPV4;
+                tcp_hdr->tcp_csum = 0;
+                tcp_hdr->tcp_csum = rte_ipv4_phdr_cksum((struct ipv4_hdr*)l3hdr,
+                                                        pkt->mbuf.ol_flags);
+            }
+        } else if (dl_type == htons(ETH_TYPE_IPV6)) {
+            if (((struct ipv6_hdr*)l3hdr)->proto == IPPROTO_TCP) {
+                struct tcp_header *tcp_hdr = (struct tcp_header*)(data + pkt->l4_ofs);
+
+                pkt->mbuf.l2_len = pkt->l3_ofs;
+                pkt->mbuf.l3_len = pkt->l4_ofs - pkt->l3_ofs;
+                pkt->mbuf.ol_flags |= PKT_TX_TCP_CKSUM|PKT_TX_IPV6;
+                tcp_hdr->tcp_csum = 0;
+                tcp_hdr->tcp_csum = rte_ipv6_phdr_cksum((struct ipv6_hdr*)l3hdr,
+                                                        pkt->mbuf.ol_flags);
+             }
+        }
+
+    }
+}
+
 /* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes ownership of
  * 'pkts', even in case of failure.
  *
@@ -1803,6 +1892,11 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
         /* We have to do a copy for now */
         memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
                dp_packet_data(batch->packets[i]), size);
+        if (batch->packets[i]->mbuf.ol_flags & PKT_TX_TCP_CKSUM) {
+            pkts[newcnt]->l2_len = batch->packets[i]->mbuf.l2_len;
+            pkts[newcnt]->l3_len = batch->packets[i]->mbuf.l3_len;
+            pkts[newcnt]->ol_flags = batch->packets[i]->mbuf.ol_flags;
+        }
 
         rte_pktmbuf_data_len(pkts[newcnt]) = size;
         rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
@@ -1861,6 +1955,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+        netdev_prepare_tx_csum(batch->packets, batch->count);
+    }
+
     if (OVS_UNLIKELY(!may_steal ||
                      batch->packets[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 9bb828f..826cf15 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3415,10 +3415,11 @@
       </column>
     </group>
 
-    <group title="Rx Checksum Offload Configuration">
+    <group title="Rx/TX Checksum Offload Configuration">
       <p>
-        The checksum validation on the incoming packets are performed on NIC
-        using Rx checksum offload feature. Implemented only for <code>dpdk
+        The checksum validation on the incoming/outgoing packets are
+        performed on NIC using Rx/TX checksum offload feature. Implemented only
+        for <code>dpdk
         </code>physical interfaces.
       </p>
 
@@ -3427,6 +3428,12 @@
         Set to <code>false</code> to disble Rx checksum offloading on <code>
         dpdk</code>physical ports. By default, Rx checksum offload is enabled.
       </column>
+
+      <column name="options" key="tx-checksum-offload"
+              type='{"type": "boolean"}'>
+        Set to <code>false</code> to disble Tx checksum offloading on <code>
+        dpdk</code>physical ports. By default, Tx checksum offload is disabled.
+      </column>
     </group>
 
     <group title="Common Columns">
-- 
1.8.3.1



More information about the dev mailing list