[ovs-dev] [RFC PATCH] conntrack: extract l4 informations for SCTP

Paolo Valerio pvalerio at redhat.com
Mon Mar 8 11:20:00 UTC 2021


since a27d70a89 ("conntrack: add generic IP protocol support") all
the unrecognized IP protocols gets handled using ct_proto_other ops
and are managed as L3 using 3 tuples.

This patch stores L4 informations for SCTP in the conn_key so that
multiple conn instances, instead of one with ports zeroed, will be
created when there are multiple SCTP connections between two hosts.
It also performs crc32c check when not offloaded, and adds SCTP to
pat_enabled.

With this patch, given two SCTP association between two hosts, and
given for example the following rules (ARP omitted):

in_port=tap0,ip,action=ct(commit,zone=1,nat(src=10.1.1.240:12345-12346)),tap1
in_port=tap1,ct_state=-trk,ip,action=ct(table=0,zone=1,nat)
in_port=tap1,ct_state=+trk,ct_zone=1,ip,action=tap0

the following entries will be created:

sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=55884,dport=5201),reply=(src=10.1.1.1,dst=10.1.1.240,sport=5201,dport=12345),zone=1
sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=59874,dport=5202),reply=(src=10.1.1.1,dst=10.1.1.240,sport=5202,dport=12346),zone=1

instead of:

sctp,orig=(src=192.168.100.100,dst=10.1.1.1,sport=0,dport=0),reply=(src=10.1.1.1,dst=10.1.1.240,sport=0,dport=0),zone=1

Signed-off-by: Paolo Valerio <pvalerio at redhat.com>
---
 lib/conntrack.c |   97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/packets.h   |   18 ++++++++++
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 99198a601..6eddb562b 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -28,8 +28,10 @@
 #include "conntrack-tp.h"
 #include "coverage.h"
 #include "csum.h"
+#include "crc32c.h"
 #include "ct-dpif.h"
 #include "dp-packet.h"
+#include "unaligned.h"
 #include "flow.h"
 #include "netdev.h"
 #include "odp-netlink.h"
@@ -725,6 +727,9 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
         } else if (conn->key.nw_proto == IPPROTO_UDP) {
             struct udp_header *uh = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
+        } else if (conn->key.nw_proto == IPPROTO_SCTP) {
+            struct sctp_header *sh = dp_packet_l4(pkt);
+            packet_set_sctp_port(pkt, conn->rev_key.dst.port, sh->sctp_dst);
         }
     } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
@@ -733,6 +738,9 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
         } else if (conn->key.nw_proto == IPPROTO_UDP) {
             struct udp_header *uh = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, uh->udp_src, conn->rev_key.src.port);
+        } else if (conn->key.nw_proto == IPPROTO_SCTP) {
+            struct sctp_header *sh = dp_packet_l4(pkt);
+            packet_set_sctp_port(pkt, sh->sctp_src, conn->rev_key.src.port);
         }
     }
 }
@@ -783,6 +791,9 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
         } else if (conn->key.nw_proto == IPPROTO_UDP) {
             struct udp_header *uh = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
+        } else if (conn->key.nw_proto == IPPROTO_SCTP) {
+            struct sctp_header *sh = dp_packet_l4(pkt);
+            packet_set_sctp_port(pkt, sh->sctp_src, conn->key.src.port);
         }
     } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
@@ -791,6 +802,9 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
         } else if (conn->key.nw_proto == IPPROTO_UDP) {
             struct udp_header *uh = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, conn->key.dst.port, uh->udp_dst);
+        } else if (conn->key.nw_proto == IPPROTO_SCTP) {
+            struct sctp_header *sh = dp_packet_l4(pkt);
+            packet_set_sctp_port(pkt, conn->key.dst.port, sh->sctp_dst);
         }
     }
 }
@@ -807,6 +821,10 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
             struct udp_header *uh_in = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, conn->key.src.port,
                                 uh_in->udp_dst);
+        } else if (conn->key.nw_proto == IPPROTO_SCTP) {
+            struct sctp_header *sh_in = dp_packet_l4(pkt);
+            packet_set_sctp_port(pkt, conn->key.src.port,
+                                 sh_in->sctp_dst);
         }
     } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
         if (conn->key.nw_proto == IPPROTO_TCP) {
@@ -817,6 +835,10 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
             struct udp_header *uh_in = dp_packet_l4(pkt);
             packet_set_udp_port(pkt, uh_in->udp_src,
                                 conn->key.dst.port);
+        } else if (conn->key.nw_proto == IPPROTO_SCTP) {
+            struct sctp_header *sh_in = dp_packet_l4(pkt);
+            packet_set_sctp_port(pkt, sh_in->sctp_src,
+                                 conn->key.dst.port);
         }
     }
 }
@@ -1680,6 +1702,26 @@ checksum_valid(const struct conn_key *key, const void *data, size_t size,
     }
 }
 
+static inline bool
+sctp_checksum_valid(const void *data, size_t size)
+{
+    struct sctp_header *sctp = (struct sctp_header *) data;
+    ovs_be32 rcvd_csum, csum;
+    bool ret;
+
+    rcvd_csum = get_16aligned_be32(&sctp->sctp_csum);
+    put_16aligned_be32(&sctp->sctp_csum, 0);
+    csum = crc32c(data, size);
+    put_16aligned_be32(&sctp->sctp_csum, rcvd_csum);
+
+    ret = (rcvd_csum == csum);
+    if (!ret) {
+        COVERAGE_INC(conntrack_l4csum_err);
+    }
+
+    return ret;
+}
+
 static inline bool
 check_l4_tcp(const struct conn_key *key, const void *data, size_t size,
              const void *l3, bool validate_checksum)
@@ -1716,6 +1758,39 @@ check_l4_udp(const struct conn_key *key, const void *data, size_t size,
            || (validate_checksum ? checksum_valid(key, data, size, l3) : true);
 }
 
+static inline bool
+sctp_check_len(const struct sctp_header *sh, size_t size)
+{
+    const struct sctp_chunk_header *sch;
+    size_t s;
+
+    if (size < SCTP_HEADER_LEN) {
+        return false;
+    }
+
+    FOR_EACH_SCTP_CHUNK(sh, sch, s, size) {
+        /* This value represents the size of the chunk in bytes, including
+         * the Chunk Type, Chunk Flags, Chunk Length, and Chunk Value fields.
+         * Therefore, if the Chunk Value field is zero-length, the Length
+         * field will be set to 4. */
+        if (ntohs(sch->length) < sizeof(*sch)) {
+            return false;
+        }
+    }
+
+    return (s == size);
+}
+
+static inline bool
+check_l4_sctp(const void *data, size_t size, bool validate_checksum)
+{
+    if (OVS_UNLIKELY(!sctp_check_len(data, size))) {
+        return false;
+    }
+
+    return validate_checksum ? sctp_checksum_valid(data, size) : true;
+}
+
 static inline bool
 check_l4_icmp(const void *data, size_t size, bool validate_checksum)
 {
@@ -1766,6 +1841,22 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size,
     return key->src.port && key->dst.port;
 }
 
+static inline bool
+extract_l4_sctp(struct conn_key *key, const void *data, size_t size,
+                size_t *chk_len)
+{
+    if (OVS_UNLIKELY(size < (chk_len ? *chk_len : SCTP_HEADER_LEN))) {
+        return false;
+    }
+
+    const struct sctp_header *sctp = data;
+    key->src.port = sctp->sctp_src;
+    key->dst.port = sctp->sctp_dst;
+
+    /* Port 0 is invalid */
+    return key->src.port && key->dst.port;
+}
+
 static inline bool extract_l4(struct conn_key *key, const void *data,
                               size_t size, bool *related, const void *l3,
                               bool validate_checksum, size_t *chk_len);
@@ -1981,6 +2072,9 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
         return (!related || check_l4_udp(key, data, size, l3,
                 validate_checksum))
                && extract_l4_udp(key, data, size, chk_len);
+    } else if (key->nw_proto == IPPROTO_SCTP) {
+        return (!related || check_l4_sctp(data, size, validate_checksum))
+               && extract_l4_sctp(key, data, size, chk_len);
     } else if (key->dl_type == htons(ETH_TYPE_IP)
                && key->nw_proto == IPPROTO_ICMP) {
         return (!related || check_l4_icmp(data, size, validate_checksum))
@@ -2277,7 +2371,8 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
           ? true : false;
     union ct_addr first_addr = ct_addr;
     bool pat_enabled = conn->key.nw_proto == IPPROTO_TCP ||
-                       conn->key.nw_proto == IPPROTO_UDP;
+                       conn->key.nw_proto == IPPROTO_UDP ||
+                       conn->key.nw_proto == IPPROTO_SCTP;
 
     while (true) {
         if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
diff --git a/lib/packets.h b/lib/packets.h
index 481bc22fa..3225d4028 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -842,6 +842,24 @@ struct sctp_header {
 };
 BUILD_ASSERT_DECL(SCTP_HEADER_LEN == sizeof(struct sctp_header));
 
+#define SCTP_CHUNK_HEADER_LEN 4
+struct sctp_chunk_header {
+    uint8_t type;
+    uint8_t flags;
+    ovs_be16 length;
+};
+BUILD_ASSERT_DECL(SCTP_CHUNK_HEADER_LEN == sizeof(struct sctp_chunk_header));
+
+#define SCTP_NEXT_CHUNK(sh, off) \
+    ALIGNED_CAST(struct sctp_chunk_header *, (uint8_t *) sh + off)
+
+#define FOR_EACH_SCTP_CHUNK(sh, sch, offset, size) \
+    for (offset = sizeof(struct sctp_header), \
+         sch = SCTP_NEXT_CHUNK(sh, offset); \
+         offset < size; \
+         offset += (ntohs(sch->length) + 3) & ~3, \
+         sch = SCTP_NEXT_CHUNK(sh, offset))
+
 #define UDP_HEADER_LEN 8
 struct udp_header {
     ovs_be16 udp_src;



More information about the dev mailing list