[ovs-dev] [RFC PATCH v1] net-dpdk: Introducing TX tcp HW checksum offload support for DPDK pnic
Zhenyu Gao
sysugaozhenyu at gmail.com
Fri Jun 16 12:53:52 UTC 2017
This patch introduce TX tcp-checksum offload support for DPDK pnic.
The feature is disabled by default and can be enabled by setting
tx-checksum-offload, which like:
ovs-vsctl set Interface dpdk-eth3 \
options:tx-checksum-offload=true
---
lib/netdev-dpdk.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++----
vswitchd/vswitch.xml | 13 ++++--
2 files changed, 115 insertions(+), 10 deletions(-)
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bba4de3..5a68a48 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -32,6 +32,7 @@
#include <rte_mbuf.h>
#include <rte_meter.h>
#include <rte_virtio_net.h>
+#include <rte_ip.h>
#include "dirs.h"
#include "dp-packet.h"
@@ -328,6 +329,7 @@ struct ingress_policer {
enum dpdk_hw_ol_features {
NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
+ NETDEV_TX_CHECKSUM_OFFLOAD = 1 << 1,
};
struct netdev_dpdk {
@@ -649,6 +651,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
int diag = 0;
int i;
struct rte_eth_conf conf = port_conf;
+ struct rte_eth_txconf *txconf;
+ struct rte_eth_dev_info dev_info;
if (dev->mtu > ETHER_MTU) {
conf.rxmode.jumbo_frame = 1;
@@ -676,9 +680,16 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
break;
}
+ rte_eth_dev_info_get(dev->port_id, &dev_info);
+ txconf = &dev_info.default_txconf;
+ if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+ /*Enable tx offload feature on pnic*/
+ txconf->txq_flags = 0;
+ }
+
for (i = 0; i < n_txq; i++) {
diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
- dev->socket_id, NULL);
+ dev->socket_id, txconf);
if (diag) {
VLOG_INFO("Interface %s txq(%d) setup error: %s",
dev->up.name, i, rte_strerror(-diag));
@@ -724,11 +735,15 @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)
{
struct rte_eth_dev_info info;
bool rx_csum_ol_flag = false;
+ bool tx_csum_ol_flag = false;
uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM |
DEV_RX_OFFLOAD_IPV4_CKSUM;
+ uint32_t tx_chksm_offload_capa = DEV_TX_OFFLOAD_TCP_CKSUM;
+
rte_eth_dev_info_get(dev->port_id, &info);
rx_csum_ol_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
+ tx_csum_ol_flag = (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) != 0;
if (rx_csum_ol_flag &&
(info.rx_offload_capa & rx_chksm_offload_capa) !=
@@ -736,9 +751,15 @@ dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)
VLOG_WARN_ONCE("Rx checksum offload is not supported on device %"PRIu8,
dev->port_id);
dev->hw_ol_features &= ~NETDEV_RX_CHECKSUM_OFFLOAD;
- return;
+ } else if (tx_csum_ol_flag &&
+ (info.tx_offload_capa & tx_chksm_offload_capa) !=
+ tx_chksm_offload_capa) {
+ VLOG_WARN_ONCE("Tx checksum offload is not supported on device %"PRIu8,
+ dev->port_id);
+ dev->hw_ol_features &= ~NETDEV_TX_CHECKSUM_OFFLOAD;
+ } else {
+ netdev_request_reconfigure(&dev->up);
}
- netdev_request_reconfigure(&dev->up);
}
static void
@@ -1119,6 +1140,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
} else {
smap_add(args, "rx_csum_offload", "false");
}
+ if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+ smap_add(args, "tx_csum_offload", "true");
+ } else {
+ smap_add(args, "tx_csum_offload", "false");
+ }
}
ovs_mutex_unlock(&dev->mutex);
@@ -1210,7 +1236,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
{RTE_FC_RX_PAUSE, RTE_FC_FULL }
};
bool rx_chksm_ofld;
- bool temp_flag;
+ bool tx_chksm_ofld;
+ bool temp_rx_flag;
+ bool temp_tx_flag;
+ bool change = false;
const char *new_devargs;
int err = 0;
@@ -1295,13 +1324,24 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
/* Rx checksum offload configuration */
/* By default the Rx checksum offload is ON */
rx_chksm_ofld = smap_get_bool(args, "rx-checksum-offload", true);
- temp_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
+ tx_chksm_ofld = smap_get_bool(args, "tx-checksum-offload", false);
+ temp_rx_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
!= 0;
- if (temp_flag != rx_chksm_ofld) {
+ temp_tx_flag = (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD)
+ != 0;
+ if (temp_rx_flag != rx_chksm_ofld) {
dev->hw_ol_features ^= NETDEV_RX_CHECKSUM_OFFLOAD;
- dpdk_eth_checksum_offload_configure(dev);
+ change = true;
}
+ if (temp_tx_flag != tx_chksm_ofld) {
+ dev->hw_ol_features ^= NETDEV_TX_CHECKSUM_OFFLOAD;
+ change = true;
+ }
+
+ if (change) {
+ dpdk_eth_checksum_offload_configure(dev);
+ }
out:
ovs_mutex_unlock(&dev->mutex);
ovs_mutex_unlock(&dpdk_mutex);
@@ -1415,6 +1455,55 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
rte_free(rx);
}
+static inline void
+netdev_prepare_tx_csum(struct dp_packet **pkts, int pkt_cnt)
+{
+ int i = 0;
+
+ for (i = 0; i < pkt_cnt; i++) {
+ ovs_be16 dl_type;
+ struct dp_packet *pkt = (struct dp_packet*)pkts[i];
+ const char *data = dp_packet_data(pkt);
+ char *l3hdr = (char*)(data + pkt->l3_ofs);
+
+ if (pkt->l3_ofs == UINT16_MAX) {
+ continue;
+ }
+
+ if (pkt->l4_ofs == UINT16_MAX) {
+ continue;
+ }
+
+
+ dl_type = *(ovs_be16 *)(data + pkt->l3_ofs - 2);
+ if (dl_type == htons(ETH_TYPE_IP)) {
+
+ if (((struct ipv4_hdr*)l3hdr)->next_proto_id == IPPROTO_TCP) {
+ struct tcp_header *tcp_hdr = (struct tcp_header*)(data + pkt->l4_ofs);
+
+ pkt->mbuf.l2_len = pkt->l3_ofs;
+ pkt->mbuf.l3_len = pkt->l4_ofs - pkt->l3_ofs;
+ pkt->mbuf.ol_flags |= PKT_TX_TCP_CKSUM|PKT_TX_IPV4;
+ tcp_hdr->tcp_csum = 0;
+ tcp_hdr->tcp_csum = rte_ipv4_phdr_cksum((struct ipv4_hdr*)l3hdr,
+ pkt->mbuf.ol_flags);
+ }
+ } else if (dl_type == htons(ETH_TYPE_IPV6)) {
+ if (((struct ipv6_hdr*)l3hdr)->proto == IPPROTO_TCP) {
+ struct tcp_header *tcp_hdr = (struct tcp_header*)(data + pkt->l4_ofs);
+
+ pkt->mbuf.l2_len = pkt->l3_ofs;
+ pkt->mbuf.l3_len = pkt->l4_ofs - pkt->l3_ofs;
+ pkt->mbuf.ol_flags |= PKT_TX_TCP_CKSUM|PKT_TX_IPV6;
+ tcp_hdr->tcp_csum = 0;
+ tcp_hdr->tcp_csum = rte_ipv6_phdr_cksum((struct ipv6_hdr*)l3hdr,
+ pkt->mbuf.ol_flags);
+ }
+ }
+
+ }
+}
+
/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
* 'pkts', even in case of failure.
*
@@ -1803,6 +1892,11 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
/* We have to do a copy for now */
memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
dp_packet_data(batch->packets[i]), size);
+ if (batch->packets[i]->mbuf.ol_flags & PKT_TX_TCP_CKSUM) {
+ pkts[newcnt]->l2_len = batch->packets[i]->mbuf.l2_len;
+ pkts[newcnt]->l3_len = batch->packets[i]->mbuf.l3_len;
+ pkts[newcnt]->ol_flags = batch->packets[i]->mbuf.ol_flags;
+ }
rte_pktmbuf_data_len(pkts[newcnt]) = size;
rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
@@ -1861,6 +1955,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
}
+ if (dev->hw_ol_features & NETDEV_TX_CHECKSUM_OFFLOAD) {
+ netdev_prepare_tx_csum(batch->packets, batch->count);
+ }
+
if (OVS_UNLIKELY(!may_steal ||
batch->packets[0]->source != DPBUF_DPDK)) {
struct netdev *netdev = &dev->up;
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 9bb828f..826cf15 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3415,10 +3415,11 @@
</column>
</group>
- <group title="Rx Checksum Offload Configuration">
+ <group title="Rx/TX Checksum Offload Configuration">
<p>
- The checksum validation on the incoming packets are performed on NIC
- using Rx checksum offload feature. Implemented only for <code>dpdk
+ The checksum validation on the incoming/outgoing packets are
+ performed on NIC using Rx/TX checksum offload feature. Implemented only
+ for <code>dpdk
</code>physical interfaces.
</p>
@@ -3427,6 +3428,12 @@
Set to <code>false</code> to disble Rx checksum offloading on <code>
dpdk</code>physical ports. By default, Rx checksum offload is enabled.
</column>
+
+ <column name="options" key="tx-checksum-offload"
+ type='{"type": "boolean"}'>
+ Set to <code>false</code> to disble Tx checksum offloading on <code>
+ dpdk</code>physical ports. By default, Tx checksum offload is disabled.
+ </column>
</group>
<group title="Common Columns">
--
1.8.3.1
More information about the dev
mailing list