[ovs-dev] [RFC PATCH] userspace: Enable tunnel with TSO.
William Tu
u9012063 at gmail.com
Wed Feb 3 18:12:55 UTC 2021
Currently when setting 'userspace-tso-enable=true', tunnel test cases
fail due to incorrect checksum, at inner header and outer header.
The patch recalculates the checksum before packet is outputting to
a port (tunnel and tap), makes sure the receiver sees correct checksum.
Consider the following cases:
1) veth -> ovs -> veth, and 2) tap -> ovs -> tap
No need to recalc csum because vnet hdr carries the offload
information.
3) decap: vxlan tunnel -> br-underlay -> br-overlay
The inner packet is sent to br-overlay (which is a tap).
Need to fix the inner header's csum.
4) encap: br-overlay -> br-underlay -> vxlan tunnel
Fix the inner csum before pushing the outer header.
I added iperf and pass vxlan and geneve tests:
$ make check-system-tso TESTSUITEFLAGS="-k vxlan"
$ make check-system-tso TESTSUITEFLAGS="-k geneve"
While TCP works over tunnel, the TCP sender sending huge
packet size will fail. I have to segment the inner TCP
packet before pushing the outer tunnel header.
Signed-off-by: William Tu <u9012063 at gmail.com>
---
lib/netdev-linux.c | 2 +-
lib/netdev-native-tnl.c | 11 ++++++++++-
lib/netdev.c | 18 ++++++------------
lib/packets.c | 34 ++++++++++++++++++++++++++++++++++
lib/packets.h | 1 +
tests/system-tap.at | 3 +++
tests/system-traffic.at | 9 +++++++++
7 files changed, 64 insertions(+), 14 deletions(-)
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 6be23dbeed57..bb365b3b0da3 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1446,7 +1446,6 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
netdev_get_name(netdev_));
continue;
}
-
dp_packet_batch_add(batch, pkt);
}
@@ -1604,6 +1603,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
int error;
if (tso) {
+ packet_csum_tcpudp(packet);
netdev_linux_prepend_vnet_hdr(packet, mtu);
}
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index b89dfdd52a86..003c78a151f8 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -43,6 +43,7 @@
#include "seq.h"
#include "unaligned.h"
#include "unixctl.h"
+#include "userspace-tso.h"
#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(native_tnl);
@@ -153,6 +154,12 @@ netdev_tnl_push_ip_header(struct dp_packet *packet,
struct ip_header *ip;
struct ovs_16aligned_ip6_hdr *ip6;
+ if (userspace_tso_enabled()) {
+ /* Calculate inner header's checksum before pushing outer header.
+ * (Assume the device does not support tnl checksum) */
+ packet_csum_tcpudp(packet);
+ }
+
eth = dp_packet_push_uninit(packet, size);
*ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
@@ -189,7 +196,9 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
return NULL;
}
- if (udp->udp_csum) {
+ /* 'udp->udp_csum' will be the pseudo header csum when when userspace
+ * TSO is enabled. Skip the validation. */
+ if (udp->udp_csum && !userspace_tso_enabled()) {
if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet))) {
uint32_t csum;
if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
diff --git a/lib/netdev.c b/lib/netdev.c
index 91e91955c09b..bdf0000c45e9 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -960,18 +960,12 @@ netdev_push_header(const struct netdev *netdev,
size_t i, size = dp_packet_batch_size(batch);
DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
- if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet)
- || dp_packet_hwol_l4_mask(packet))) {
- COVERAGE_INC(netdev_push_header_drops);
- dp_packet_delete(packet);
- VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
- "not supported: packet dropped",
- netdev_get_name(netdev));
- } else {
- netdev->netdev_class->push_header(netdev, packet, data);
- pkt_metadata_init(&packet->md, data->out_port);
- dp_packet_batch_refill(batch, packet, i);
- }
+ /* Tunneling packet with HW offload flags is not supported. */
+ *dp_packet_ol_flags_ptr(packet) = 0;
+
+ netdev->netdev_class->push_header(netdev, packet, data);
+ pkt_metadata_init(&packet->md, data->out_port);
+ dp_packet_batch_refill(batch, packet, i);
}
return 0;
diff --git a/lib/packets.c b/lib/packets.c
index 4a7643c5dd3a..b0bb283acdfa 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1887,3 +1887,37 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
}
}
}
+
+void
+packet_csum_tcpudp(struct dp_packet *p)
+{
+ struct eth_header *eth;
+ struct ip_header *ip;
+ struct tcp_header *tcp;
+ struct udp_header *udp;
+ uint32_t pseudo_hdr_csum;
+ uint8_t l4proto;
+ size_t l4_size;
+
+ eth = dp_packet_eth(p);
+ if (eth->eth_type != htons(ETH_TYPE_IP)) {
+ return;
+ }
+
+ ip = dp_packet_l3(p);
+ l4proto = ip->ip_proto;
+ l4_size = dp_packet_l4_size(p);
+
+ if (l4proto == IPPROTO_TCP) {
+ pseudo_hdr_csum = packet_csum_pseudoheader(ip);
+ tcp = dp_packet_l4(p);
+ tcp->tcp_csum = 0;
+ tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum, tcp, l4_size));
+
+ } else if (l4proto == IPPROTO_UDP) {
+ pseudo_hdr_csum = packet_csum_pseudoheader(ip);
+ udp = dp_packet_l4(p);
+ udp->udp_csum = 0;
+ udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum, udp, l4_size));
+ }
+}
diff --git a/lib/packets.h b/lib/packets.h
index 481bc22fa1fe..1bea8c504811 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1634,6 +1634,7 @@ void packet_put_ra_prefix_opt(struct dp_packet *,
ovs_be32 preferred_lifetime,
const ovs_be128 router_prefix);
uint32_t packet_csum_pseudoheader(const struct ip_header *);
+void packet_csum_tcpudp(struct dp_packet *p);
void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
#define DNS_HEADER_LEN 12
diff --git a/tests/system-tap.at b/tests/system-tap.at
index 871a3bda4fcc..be108c59b3c9 100644
--- a/tests/system-tap.at
+++ b/tests/system-tap.at
@@ -29,6 +29,9 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0],
OVS_START_L7([at_ns1], [http])
NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+NETNS_DAEMONIZE([at_ns0], [iperf -s], [iperf.pid])
+NS_CHECK_EXEC([at_ns1], [iperf -c 10.1.1.1 -t1 1> /dev/null], [0])
+
OVS_TRAFFIC_VSWITCHD_STOP(["/.*ethtool command ETHTOOL_G.*/d"])
AT_CLEANUP
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index fb5b9a36d283..ed014953ca4e 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -248,6 +248,7 @@ dnl Okay, now check the overlay with different packet sizes
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
+
NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
@@ -255,6 +256,10 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
+NETNS_DAEMONIZE([at_ns0], [iperf -s], [iperf.pid])
+AT_CHECK([ethtool -K br0 tso off &> /dev/null], [0])
+AT_CHECK([iperf -c 10.1.1.1 -t1 1> /dev/null], [0])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
@@ -571,6 +576,10 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
+NETNS_DAEMONIZE([at_ns0], [iperf -s], [iperf.pid])
+AT_CHECK([ethtool -K br0 tso off &> /dev/null], [0])
+AT_CHECK([iperf -c 10.1.1.1 -t1 1> /dev/null], [0])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
--
2.7.4
More information about the dev
mailing list