[ovs-dev] [PATCH v5 15/16] conntrack: Track ICMP type and code.

Daniele Di Proietto diproiettod at vmware.com
Wed Jul 27 00:58:15 UTC 2016


>From the connection tracker perspective, an ICMP connection is a tuple
identified by source ip address, destination ip address and ICMP id.

While this allows basic ICMP traffic (pings) to work, it doesn't take
into account the icmp type: the connection tracker will allow
requests/replies in any directions.

This is improved by making the ICMP type and code part of the connection
tuple.  An ICMP echo request packet from A to B, will create a
connection that matches ICMP echo request from A to B and ICMP echo
replies from B to A.  The same is done for timestamp and info
request/replies, and for ICMPv6.

A new modules conntrack-icmp is implemented, to allow only "request"
types to create new connections.

Also, since they're tracked in both userspace and kernel
implementations, ICMP type and code are always printed in ct-dpif (a few
testcase are updated as a consequence).

Reported-by: Subramani Paramasivam <subramani.paramasivam at wipro.com>
Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
Acked-by: Joe Stringer <joe at ovn.org>
---
 lib/automake.mk         |   1 +
 lib/conntrack-icmp.c    | 105 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/conntrack-private.h |  11 ++++-
 lib/conntrack.c         |  62 ++++++++++++++++++++++++----
 lib/conntrack.h         |   2 +
 lib/ct-dpif.c           |  24 ++++-------
 lib/ct-dpif.h           |   3 +-
 lib/netlink-conntrack.c |   2 +-
 tests/system-traffic.at |  12 +++---
 9 files changed, 188 insertions(+), 34 deletions(-)
 create mode 100644 lib/conntrack-icmp.c

diff --git a/lib/automake.mk b/lib/automake.mk
index b1da53d..4110e5f 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -49,6 +49,7 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/compiler.h \
 	lib/connectivity.c \
 	lib/connectivity.h \
+	lib/conntrack-icmp.c \
 	lib/conntrack-private.h \
 	lib/conntrack-tcp.c \
 	lib/conntrack-other.c \
diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
new file mode 100644
index 0000000..40fd1d8
--- /dev/null
+++ b/lib/conntrack-icmp.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015, 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+
+#include "conntrack-private.h"
+#include "dp-packet.h"
+
+enum icmp_state {
+    ICMPS_FIRST,
+    ICMPS_REPLY,
+};
+
+struct conn_icmp {
+    struct conn up;
+    enum icmp_state state;
+};
+
+static const enum ct_timeout icmp_timeouts[] = {
+    [ICMPS_FIRST] = CT_TM_ICMP_FIRST,
+    [ICMPS_REPLY] = CT_TM_ICMP_REPLY,
+};
+
+static struct conn_icmp *
+conn_icmp_cast(const struct conn *conn)
+{
+    return CONTAINER_OF(conn, struct conn_icmp, up);
+}
+
+static enum ct_update_res
+icmp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
+                 struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
+{
+    struct conn_icmp *conn = conn_icmp_cast(conn_);
+
+    if (reply && conn->state != ICMPS_REPLY) {
+        conn->state = ICMPS_REPLY;
+    }
+
+    conn_update_expiration(ctb, &conn->up, icmp_timeouts[conn->state], now);
+
+    return CT_UPDATE_VALID;
+}
+
+static bool
+icmp4_valid_new(struct dp_packet *pkt)
+{
+    struct icmp_header *icmp = dp_packet_l4(pkt);
+
+    return icmp->icmp_type == ICMP4_ECHO_REQUEST
+           || icmp->icmp_type == ICMP4_INFOREQUEST
+           || icmp->icmp_type == ICMP4_TIMESTAMP;
+}
+
+static bool
+icmp6_valid_new(struct dp_packet *pkt)
+{
+    struct icmp6_header *icmp6 = dp_packet_l4(pkt);
+
+    return icmp6->icmp6_type == ICMP6_ECHO_REQUEST;
+}
+
+static struct conn *
+icmp_new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt OVS_UNUSED,
+               long long now)
+{
+    struct conn_icmp *conn;
+
+    conn = xzalloc(sizeof *conn);
+    conn->state = ICMPS_FIRST;
+
+    conn_init_expiration(ctb, &conn->up, icmp_timeouts[conn->state], now);
+
+    return &conn->up;
+}
+
+struct ct_l4_proto ct_proto_icmp4 = {
+    .new_conn = icmp_new_conn,
+    .valid_new = icmp4_valid_new,
+    .conn_update = icmp_conn_update,
+};
+
+struct ct_l4_proto ct_proto_icmp6 = {
+    .new_conn = icmp_new_conn,
+    .valid_new = icmp6_valid_new,
+    .conn_update = icmp_conn_update,
+};
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index df32525..013f19f 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -40,7 +40,14 @@ struct ct_addr {
 
 struct ct_endpoint {
     struct ct_addr addr;
-    ovs_be16 port;
+    union {
+        ovs_be16 port;
+        struct {
+            ovs_be16 icmp_id;
+            uint8_t icmp_type;
+            uint8_t icmp_code;
+        };
+    };
 };
 
 /* Changes to this structure need to be reflected in conn_key_hash() */
@@ -83,6 +90,8 @@ struct ct_l4_proto {
 
 extern struct ct_l4_proto ct_proto_tcp;
 extern struct ct_l4_proto ct_proto_other;
+extern struct ct_l4_proto ct_proto_icmp4;
+extern struct ct_l4_proto ct_proto_icmp6;
 
 extern long long ct_timeout_val[];
 
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 15a9582..8e6c826 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -78,8 +78,8 @@ static void *clean_thread_main(void *f_);
 static struct ct_l4_proto *l4_protos[] = {
     [IPPROTO_TCP] = &ct_proto_tcp,
     [IPPROTO_UDP] = &ct_proto_other,
-    [IPPROTO_ICMP] = &ct_proto_other,
-    [IPPROTO_ICMPV6] = &ct_proto_other,
+    [IPPROTO_ICMP] = &ct_proto_icmp4,
+    [IPPROTO_ICMPV6] = &ct_proto_icmp6,
 };
 
 long long ct_timeout_val[] = {
@@ -693,6 +693,29 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size)
 static inline bool extract_l4(struct conn_key *key, const void *data,
                               size_t size, bool *related, const void *l3);
 
+static uint8_t
+reverse_icmp_type(uint8_t type)
+{
+    switch (type) {
+    case ICMP4_ECHO_REQUEST:
+        return ICMP4_ECHO_REPLY;
+    case ICMP4_ECHO_REPLY:
+        return ICMP4_ECHO_REQUEST;
+
+    case ICMP4_TIMESTAMP:
+        return ICMP4_TIMESTAMPREPLY;
+    case ICMP4_TIMESTAMPREPLY:
+        return ICMP4_TIMESTAMP;
+
+    case ICMP4_INFOREQUEST:
+        return ICMP4_INFOREPLY;
+    case ICMP4_INFOREPLY:
+        return ICMP4_INFOREQUEST;
+    default:
+        OVS_NOT_REACHED();
+    }
+}
+
 /* If 'related' is not NULL and the function is processing an ICMP
  * error packet, extract the l3 and l4 fields from the nested header
  * instead and set *related to true.  If 'related' is NULL we're
@@ -715,8 +738,13 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
     case ICMP4_TIMESTAMPREPLY:
     case ICMP4_INFOREQUEST:
     case ICMP4_INFOREPLY:
+        if (icmp->icmp_code != 0) {
+            return false;
+        }
         /* Separate ICMP connection: identified using id */
-        key->src.port = key->dst.port = icmp->icmp_fields.echo.id;
+        key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id;
+        key->src.icmp_type = icmp->icmp_type;
+        key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type);
         break;
     case ICMP4_DST_UNREACH:
     case ICMP4_TIME_EXCEEDED:
@@ -766,6 +794,19 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
     return true;
 }
 
+static uint8_t
+reverse_icmp6_type(uint8_t type)
+{
+    switch (type) {
+    case ICMP6_ECHO_REQUEST:
+        return ICMP6_ECHO_REPLY;
+    case ICMP6_ECHO_REPLY:
+        return ICMP6_ECHO_REQUEST;
+    default:
+        OVS_NOT_REACHED();
+    }
+}
+
 /* If 'related' is not NULL and the function is processing an ICMP
  * error packet, extract the l3 and l4 fields from the nested header
  * instead and set *related to true.  If 'related' is NULL we're
@@ -786,8 +827,13 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
     switch (icmp6->icmp6_type) {
     case ICMP6_ECHO_REQUEST:
     case ICMP6_ECHO_REPLY:
+        if (icmp6->icmp6_code != 0) {
+            return false;
+        }
         /* Separate ICMP connection: identified using id */
-        key->src.port = key->dst.port = *(ovs_be16 *) (icmp6 + 1);
+        key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1);
+        key->src.icmp_type = icmp6->icmp6_type;
+        key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type);
         break;
     case ICMP6_DST_UNREACH:
     case ICMP6_PACKET_TOO_BIG:
@@ -980,6 +1026,7 @@ static void
 conn_key_reverse(struct conn_key *key)
 {
     struct ct_endpoint tmp;
+
     tmp = key->src;
     key->src = key->dst;
     key->dst = tmp;
@@ -1079,10 +1126,9 @@ conn_key_to_tuple(const struct conn_key *key, struct ct_dpif_tuple *tuple)
                                      key->dl_type);
 
     if (key->nw_proto == IPPROTO_ICMP || key->nw_proto == IPPROTO_ICMPV6) {
-        tuple->icmp_id = key->src.port;
-        /* ICMP type and code are not tracked */
-        tuple->icmp_type = 0;
-        tuple->icmp_code = 0;
+        tuple->icmp_id = key->src.icmp_id;
+        tuple->icmp_type = key->src.icmp_type;
+        tuple->icmp_code = key->src.icmp_code;
     } else {
         tuple->src_port = key->src.port;
         tuple->dst_port = key->dst.port;
diff --git a/lib/conntrack.h b/lib/conntrack.h
index 8802d35..d188105 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -130,6 +130,8 @@ static inline void ct_lock_destroy(struct ct_lock *lock)
     CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
     CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
     CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
+    CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
+    CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
 
 /* The smallest of the above values: it is used as an upper bound for the
  * interval between two rounds of cleanup of expired entries */
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index 63ca498..6c6a07b 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -136,14 +136,14 @@ ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
     ct_dpif_format_ipproto(ds, entry->tuple_orig.ip_proto);
 
     ds_put_cstr(ds, ",orig=(");
-    ct_dpif_format_tuple(ds, &entry->tuple_orig, verbose);
+    ct_dpif_format_tuple(ds, &entry->tuple_orig);
     if (print_stats) {
         ct_dpif_format_counters(ds, &entry->counters_orig);
     }
     ds_put_cstr(ds, ")");
 
     ds_put_cstr(ds, ",reply=(");
-    ct_dpif_format_tuple(ds, &entry->tuple_reply, verbose);
+    ct_dpif_format_tuple(ds, &entry->tuple_reply);
     if (print_stats) {
         ct_dpif_format_counters(ds, &entry->counters_reply);
     }
@@ -179,7 +179,7 @@ ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
     ct_dpif_format_helper(ds, ",helper=", &entry->helper);
     if (verbose && entry->tuple_master.l3_type != 0) {
         ds_put_cstr(ds, ",master=(");
-        ct_dpif_format_tuple(ds, &entry->tuple_master, verbose);
+        ct_dpif_format_tuple(ds, &entry->tuple_master);
         ds_put_cstr(ds, ")");
     }
 }
@@ -227,17 +227,10 @@ ct_dpif_format_timestamp(struct ds *ds,
 }
 
 static void
-ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple,
-                          bool verbose)
+ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple)
 {
-    if (verbose) {
-        ds_put_format(ds, ",id=%u,type=%u,code=%u",
-                      ntohs(tuple->icmp_id),
-                      tuple->icmp_type,
-                      tuple->icmp_code);
-    } else {
-        ds_put_format(ds, ",id=%u", ntohs(tuple->icmp_id));
-    }
+    ds_put_format(ds, ",id=%u,type=%u,code=%u", ntohs(tuple->icmp_id),
+                  tuple->icmp_type, tuple->icmp_code);
 }
 
 static void
@@ -248,8 +241,7 @@ ct_dpif_format_tuple_tp(struct ds *ds, const struct ct_dpif_tuple *tuple)
 }
 
 void
-ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple,
-                     bool verbose)
+ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple)
 {
     if (tuple->l3_type == AF_INET) {
         ds_put_format(ds, "src="IP_FMT",dst="IP_FMT,
@@ -268,7 +260,7 @@ ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple,
 
     if (tuple->ip_proto == IPPROTO_ICMP
         || tuple->ip_proto == IPPROTO_ICMPV6) {
-        ct_dpif_format_tuple_icmp(ds, tuple, verbose);
+        ct_dpif_format_tuple_icmp(ds, tuple);
     } else {
         ct_dpif_format_tuple_tp(ds, tuple);
     }
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index 47663c7..5da3c2c 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -183,7 +183,6 @@ int ct_dpif_flush(struct dpif *, const uint16_t *zone);
 void ct_dpif_entry_uninit(struct ct_dpif_entry *);
 void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
                           bool verbose, bool print_stats);
-void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *,
-                          bool verbose);
+void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *);
 
 #endif /* CT_DPIF_H */
diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c
index d83b09a..aab5b1f 100644
--- a/lib/netlink-conntrack.c
+++ b/lib/netlink-conntrack.c
@@ -497,7 +497,7 @@ nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
             struct ds ds;
 
             ds_init(&ds);
-            ct_dpif_format_tuple(&ds, tuple, true);
+            ct_dpif_format_tuple(&ds, tuple);
 
             VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
             ds_destroy(&ds);
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index fd8b918..5155d20 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -634,7 +634,7 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0],
 ])
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>)
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0)
 ])
 
 AT_CHECK([ovs-appctl dpctl/flush-conntrack])
@@ -686,7 +686,7 @@ NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING], [0],
 ])
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fc00::2)], [0], [dnl
-icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>),reply=(src=fc00::2,dst=fc00::1,id=<cleared>)
+icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>,type=128,code=0),reply=(src=fc00::2,dst=fc00::1,id=<cleared>,type=129,code=0)
 ])
 
 OVS_TRAFFIC_VSWITCHD_STOP
@@ -1027,8 +1027,8 @@ dnl (again) HTTP requests from root namespace to  p0 should work fine.
 AT_CHECK([wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | grep "zone"], [0], [dnl
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=1
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=2
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=1
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=2
 tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=1,protoinfo=(state=<cleared>)
 tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=2,protoinfo=(state=<cleared>)
 ])
@@ -1138,8 +1138,8 @@ dnl (again) HTTP requests from root namespace to p0 should work fine.
 AT_CHECK([wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
 
 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | grep "zone"], [0], [dnl
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=1
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=65534
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=1
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=65534
 tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=1,protoinfo=(state=<cleared>)
 tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=65534,protoinfo=(state=<cleared>)
 ])
-- 
2.8.1




More information about the dev mailing list