[ovs-dev] [PATCH v6] Use TPACKET_V3 to accelerate veth for userspace datapath

William Tu u9012063 at gmail.com
Thu Mar 12 18:34:53 UTC 2020


On Wed, Mar 11, 2020 at 6:14 PM Yi Yang (杨燚)-云服务集团 <yangyi01 at inspur.com> wrote:
>
> > >
> > > TPACKET_V3 can support TSO, but its performance isn't good because
> > > of
> > > TPACKET_V3 kernel implementation issue, so it falls back to
> >
> > What's the implementation issue? If we use latest kernel, does the
> > issue still exist?
> >
> > [Yi Yang] Per my check, the issue is the kernel can't feed enough
> > packets to tpacket_recv, so in many cases, no packets received, no 32
> > packets available, but for original non-tpacket case, one recv will
> > get 32 packets in most cases, throughput is about more than twice for
> > veth, for tap case, it is more than three times, I read kernel source
> > code, but I can't find root cause, I'll check from tpacket maintainer.
> >
> > > recvmmsg in case userspace-tso-enable is set to true, but its
> > > performance is better than recvmmsg in case userspace-tso-enable is
> > > set to false, so just use TPACKET_V3 in that case.
> > >
> > > Signed-off-by: Yi Yang <yangyi01 at inspur.com>
> > > Co-authored-by: William Tu <u9012063 at gmail.com>
> > > Signed-off-by: William Tu <u9012063 at gmail.com>
> > > ---
> > > diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
> > > new file mode 100644 index 0000000..e20aacc
> > > --- /dev/null
> > > +++ b/include/linux/if_packet.h
> >
> > if OVS_CHECK_LINUX_TPACKET returns false, can we simply fall back to
> > recvmmsg?
> > So this is not needed?
> >
> > [Yi Yang] As you said, ovs support Linux kernel 3.10.0 or above, so no
> > that case existing, isn't it?
>
> I mean if kernel supports it AND if_packet.h header exists, then we enable it.
> If kernel supports it AND if_packet.h header does not exist, then just use recvmmsg.
>
> [Yi Yang] I'm confused here, Ben told me it should be built even if  if_packet.h isn't there, that is why I added if_packet,h in include/linux/if_packet.h, I mean tpacket_v3 code should be built in this case.
>

My concern is that since there is not a lot of performance improvement,
we don't necessary need to use tpacket_v3. Or we should use tpacket_v3
as an optional configuration, but not default.

I remove the if_linux.h in the following diff, and travis works ok.
https://travis-ci.org/github/williamtu/ovs-travis/builds/661631098

---
diff --git a/acinclude.m4 b/acinclude.m4
index 1488deda0371..4b11085ab190 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1086,12 +1086,14 @@ dnl OVS_CHECK_LINUX_TPACKET
 dnl
 dnl Configure Linux TPACKET.
 AC_DEFUN([OVS_CHECK_LINUX_TPACKET], [
-  AC_COMPILE_IFELSE([
-    AC_LANG_PROGRAM([#include <linux/if_packet.h>], [
-        struct tpacket3_hdr x =  { 0 };
-    ])],
-    [AC_DEFINE([HAVE_TPACKET_V3], [1],
-    [Define to 1 if struct tpacket3_hdr is available.])])
+  AC_CHECK_HEADER([linux/if_packet.h],
+    [AC_COMPILE_IFELSE([
+      AC_LANG_PROGRAM([#include <linux/if_packet.h>], [
+          struct tpacket3_hdr x =  { 0 };
+      ])],
+      [AC_DEFINE([HAVE_TPACKET_V3], [1],
+      [Define to 1 if struct tpacket3_hdr is available.])])],
+    [])
 ])

 dnl Checks for buggy strtok_r.
diff --git a/include/linux/automake.mk b/include/linux/automake.mk
index a659e65abe27..8f063f482e15 100644
--- a/include/linux/automake.mk
+++ b/include/linux/automake.mk
@@ -1,5 +1,4 @@
 noinst_HEADERS += \
-       include/linux/if_packet.h \
        include/linux/netlink.h \
        include/linux/netfilter/nf_conntrack_sctp.h \
        include/linux/pkt_cls.h \
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
deleted file mode 100644
index e20aaccb1e32..000000000000
--- a/include/linux/if_packet.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef __LINUX_IF_PACKET_WRAPPER_H
-#define __LINUX_IF_PACKET_WRAPPER_H 1
-
-#ifdef HAVE_TPACKET_V3
-#include_next <linux/if_packet.h>
-#else
-#define HAVE_TPACKET_V3 1
-
-struct sockaddr_pkt {
-        unsigned short  spkt_family;
-        unsigned char   spkt_device[14];
-        uint16_t        spkt_protocol;
-};
-
-struct sockaddr_ll {
-        unsigned short  sll_family;
-        uint16_t        sll_protocol;
-        int             sll_ifindex;
-        unsigned short  sll_hatype;
-        unsigned char   sll_pkttype;
-        unsigned char   sll_halen;
-        unsigned char   sll_addr[8];
-};
-
-/* Packet types */
-#define PACKET_HOST                     0 /* To us                */
-#define PACKET_OTHERHOST                3 /* To someone else    */
-#define PACKET_LOOPBACK                 5 /* MC/BRD frame looped back */
-
-/* Packet socket options */
-#define PACKET_RX_RING                  5
-#define PACKET_VERSION                 10
-#define PACKET_TX_RING                 13
-#define PACKET_VNET_HDR                15
-
-/* Rx ring - header status */
-#define TP_STATUS_KERNEL                0
-#define TP_STATUS_USER            (1 << 0)
-#define TP_STATUS_VLAN_VALID      (1 << 4) /* auxdata has valid tp_vlan_tci */
-#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */
-
-/* Tx ring - header status */
-#define TP_STATUS_SEND_REQUEST    (1 << 0)
-#define TP_STATUS_SENDING         (1 << 1)
-
-struct tpacket_hdr {
-    unsigned long tp_status;
-    unsigned int tp_len;
-    unsigned int tp_snaplen;
-    unsigned short tp_mac;
-    unsigned short tp_net;
-    unsigned int tp_sec;
-    unsigned int tp_usec;
-};
-
-#define TPACKET_ALIGNMENT 16
-#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
-
-struct tpacket_hdr_variant1 {
-    uint32_t tp_rxhash;
-    uint32_t tp_vlan_tci;
-    uint16_t tp_vlan_tpid;
-    uint16_t tp_padding;
-};
-
-struct tpacket3_hdr {
-    uint32_t  tp_next_offset;
-    uint32_t  tp_sec;
-    uint32_t  tp_nsec;
-    uint32_t  tp_snaplen;
-    uint32_t  tp_len;
-    uint32_t  tp_status;
-    uint16_t  tp_mac;
-    uint16_t  tp_net;
-    /* pkt_hdr variants */
-    union {
-        struct tpacket_hdr_variant1 hv1;
-    };
-    uint8_t  tp_padding[8];
-};
-
-struct tpacket_bd_ts {
-    unsigned int ts_sec;
-    union {
-        unsigned int ts_usec;
-        unsigned int ts_nsec;
-    };
-};
-
-struct tpacket_hdr_v1 {
-    uint32_t block_status;
-    uint32_t num_pkts;
-    uint32_t offset_to_first_pkt;
-    uint32_t blk_len;
-    uint64_t __attribute__((aligned(8))) seq_num;
-    struct tpacket_bd_ts ts_first_pkt, ts_last_pkt;
-};
-
-union tpacket_bd_header_u {
-    struct tpacket_hdr_v1 bh1;
-};
-
-struct tpacket_block_desc {
-    uint32_t version;
-    uint32_t offset_to_priv;
-    union tpacket_bd_header_u hdr;
-};
-
-#define TPACKET3_HDRLEN \
-    (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
-
-enum tpacket_versions {
-    TPACKET_V1,
-    TPACKET_V2,
-    TPACKET_V3
-};
-
-struct tpacket_req3 {
-    unsigned int tp_block_size; /* Minimal size of contiguous block */
-    unsigned int tp_block_nr; /* Number of blocks */
-    unsigned int tp_frame_size; /* Size of frame */
-    unsigned int tp_frame_nr; /* Total number of frames */
-    unsigned int tp_retire_blk_tov; /* Timeout in msecs */
-    unsigned int tp_sizeof_priv; /* Offset to private data area */
-    unsigned int tp_feature_req_word;
-};
-#endif /* HAVE_TPACKET_V3 */
-#endif /* __LINUX_IF_PACKET_WRAPPER_H */
diff --git a/include/sparse/linux/if_packet.h b/include/sparse/linux/if_packet.h
index 0ac3fcefc895..3813892a0788 100644
--- a/include/sparse/linux/if_packet.h
+++ b/include/sparse/linux/if_packet.h
@@ -28,114 +28,4 @@ struct sockaddr_ll {
         unsigned char   sll_addr[8];
 };

-/* Packet types */
-#define PACKET_HOST                     0 /* To us                */
-#define PACKET_OTHERHOST                3 /* To someone else   */
-#define PACKET_LOOPBACK                 5 /* MC/BRD frame looped back */
-
-/* Packet socket options */
-#define PACKET_RX_RING                  5
-#define PACKET_VERSION                 10
-#define PACKET_TX_RING                 13
-#define PACKET_VNET_HDR                15
-
-/* Rx ring - header status */
-#define TP_STATUS_KERNEL                0
-#define TP_STATUS_USER            (1 << 0)
-#define TP_STATUS_VLAN_VALID      (1 << 4) /* auxdata has valid tp_vlan_tci */
-#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */
-
-/* Tx ring - header status */
-#define TP_STATUS_SEND_REQUEST    (1 << 0)
-#define TP_STATUS_SENDING         (1 << 1)
-
-#define tpacket_hdr rpl_tpacket_hdr
-struct tpacket_hdr {
-    unsigned long tp_status;
-    unsigned int tp_len;
-    unsigned int tp_snaplen;
-    unsigned short tp_mac;
-    unsigned short tp_net;
-    unsigned int tp_sec;
-    unsigned int tp_usec;
-};
-
-#define TPACKET_ALIGNMENT 16
-#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
-
-#define tpacket_hdr_variant1 rpl_tpacket_hdr_variant1
-struct tpacket_hdr_variant1 {
-    uint32_t tp_rxhash;
-    uint32_t tp_vlan_tci;
-    uint16_t tp_vlan_tpid;
-    uint16_t tp_padding;
-};
-
-#define tpacket3_hdr rpl_tpacket3_hdr
-struct tpacket3_hdr {
-    uint32_t  tp_next_offset;
-    uint32_t  tp_sec;
-    uint32_t  tp_nsec;
-    uint32_t  tp_snaplen;
-    uint32_t  tp_len;
-    uint32_t  tp_status;
-    uint16_t  tp_mac;
-    uint16_t  tp_net;
-    /* pkt_hdr variants */
-    union {
-        struct tpacket_hdr_variant1 hv1;
-    };
-    uint8_t  tp_padding[8];
-};
-
-#define tpacket_bd_ts rpl_tpacket_bd_ts
-struct tpacket_bd_ts {
-    unsigned int ts_sec;
-    union {
-        unsigned int ts_usec;
-        unsigned int ts_nsec;
-    };
-};
-
-#define tpacket_hdr_v1 rpl_tpacket_hdr_v1
-struct tpacket_hdr_v1 {
-    uint32_t block_status;
-    uint32_t num_pkts;
-    uint32_t offset_to_first_pkt;
-    uint32_t blk_len;
-    uint64_t __attribute__((aligned(8))) seq_num;
-    struct tpacket_bd_ts ts_first_pkt, ts_last_pkt;
-};
-
-#define tpacket_bd_header_u rpl_tpacket_bd_header_u
-union tpacket_bd_header_u {
-    struct tpacket_hdr_v1 bh1;
-};
-
-#define tpacket_block_desc rpl_tpacket_block_desc
-struct tpacket_block_desc {
-    uint32_t version;
-    uint32_t offset_to_priv;
-    union tpacket_bd_header_u hdr;
-};
-
-#define TPACKET3_HDRLEN \
-    (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
-
-enum rpl_tpacket_versions {
-    TPACKET_V1,
-    TPACKET_V2,
-    TPACKET_V3
-};
-
-#define tpacket_req3 rpl_tpacket_req3
-struct tpacket_req3 {
-    unsigned int tp_block_size; /* Minimal size of contiguous block */
-    unsigned int tp_block_nr; /* Number of blocks */
-    unsigned int tp_frame_size; /* Size of frame */
-    unsigned int tp_frame_nr; /* Total number of frames */
-    unsigned int tp_retire_blk_tov; /* Timeout in msecs */
-    unsigned int tp_sizeof_priv; /* Offset to private data area */
-    unsigned int tp_feature_req_word;
-};
 #endif


More information about the dev mailing list