[ovs-dev] [PATCH 1/2] conntrack: Do not create new connections from ICMP errors.
Darrell Ball
dball at vmware.com
Fri Dec 23 01:40:00 UTC 2016
I tested an alternative diff for conntrack.c part below
In summary:
1) I think the outermost selection should be based on the “conn” check in
process_one. This is the main decision point.
This is also makes it easier to modify going forward.
The change boils down to a check in “conn is not found” else case for maybe_related.
in which case we know it is an invalid case.
2) The other change was renaming “related” to “maybe_related” since the way we use that
field, it is only “maybe_related” not “definitely related”.
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 7c50a28..23f49f5 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -49,7 +49,7 @@ struct conn_lookup_ctx {
struct conn *conn;
uint32_t hash;
bool reply;
- bool related;
+ bool maybe_related;
};
static bool conn_key_extract(struct conntrack *, struct dp_packet *,
@@ -214,7 +214,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
uint16_t state = 0;
if (conn) {
- if (ctx->related) {
+ if (ctx->maybe_related) {
state |= CS_RELATED;
if (ctx->reply) {
state |= CS_REPLY_DIR;
@@ -247,7 +247,11 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
}
}
} else {
- conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
+ if (ctx->maybe_related) {
+ state |= CS_INVALID;
+ } else {
+ conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
+ }
}
write_ct_md(pkt, state, zone, conn ? conn->mark : 0,
@@ -722,7 +726,7 @@ reverse_icmp_type(uint8_t type)
* possible */
static inline int
extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
- bool *related)
+ bool *maybe_related)
{
const struct icmp_header *icmp = data;
@@ -758,7 +762,7 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
const char *l4;
bool ok;
- if (!related) {
+ if (!maybe_related) {
return false;
}
@@ -782,7 +786,7 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
ok = extract_l4(key, l4, tail - l4, NULL, l3);
if (ok) {
conn_key_reverse(key);
- *related = true;
+ *maybe_related = true;
}
return ok;
}
@@ -813,7 +817,7 @@ reverse_icmp6_type(uint8_t type)
* possible */
static inline bool
extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
- bool *related)
+ bool *maybe_related)
{
const struct icmp6_header *icmp6 = data;
@@ -846,7 +850,7 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
const char *l4 = NULL;
bool ok;
- if (!related) {
+ if (!maybe_related) {
return false;
}
@@ -872,7 +876,7 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
ok = extract_l4(key, l4, tail - l4, NULL, l3);
if (ok) {
conn_key_reverse(key);
- *related = true;
+ *maybe_related = true;
}
return ok;
}
@@ -893,23 +897,23 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
* If 'related' is NULL, it means that we're already parsing a header nested
* in an ICMP error. In this case, we skip checksum and length validation. */
static inline bool
-extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
- const void *l3)
+extract_l4(struct conn_key *key, const void *data, size_t size,
+ bool *maybe_related, const void *l3)
{
if (key->nw_proto == IPPROTO_TCP) {
- return (!related || check_l4_tcp(key, data, size, l3))
+ return (!maybe_related || check_l4_tcp(key, data, size, l3))
&& extract_l4_tcp(key, data, size);
} else if (key->nw_proto == IPPROTO_UDP) {
- return (!related || check_l4_udp(key, data, size, l3))
+ return (!maybe_related || check_l4_udp(key, data, size, l3))
&& extract_l4_udp(key, data, size);
} else if (key->dl_type == htons(ETH_TYPE_IP)
&& key->nw_proto == IPPROTO_ICMP) {
- return (!related || check_l4_icmp(data, size))
- && extract_l4_icmp(key, data, size, related);
+ return (!maybe_related || check_l4_icmp(data, size))
+ && extract_l4_icmp(key, data, size, maybe_related);
} else if (key->dl_type == htons(ETH_TYPE_IPV6)
&& key->nw_proto == IPPROTO_ICMPV6) {
- return (!related || check_l4_icmp6(key, data, size, l3))
- && extract_l4_icmp6(key, data, size, related);
+ return (!maybe_related || check_l4_icmp6(key, data, size, l3))
+ && extract_l4_icmp6(key, data, size, maybe_related);
} else {
return false;
}
@@ -975,7 +979,7 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
}
if (ok) {
- if (extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3)) {
+ if (extract_l4(&ctx->key, l4, tail - l4, &ctx->maybe_related, l3)) {
ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
return true;
}
(END)
On 12/20/16, 12:25 PM, "ovs-dev-bounces at openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces at openvswitch.org on behalf of diproiettod at vmware.com> wrote:
ICMP error packets (e.g. destination unreachable messages) are
considered 'related' to another connection and are treated as part of
that.
However:
* We shouldn't create new entries in the connection table if the
original connection is not found. This is consistent with what the
kernel does.
* We certainly shouldn't call valid_new() on the packet, because
valid_new() assumes the packet l4 type (might be TCP, UDP or ICMP)
to be consistent with the conn_key nw_proto type.
Found by inspection.
Fixes: a489b16854b5("conntrack: New userspace connection tracker.")
Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
---
lib/conntrack.c | 50 +++++++++++++++++++++++++------------------------
tests/system-traffic.at | 27 +++++++++++++++-----------
2 files changed, 42 insertions(+), 35 deletions(-)
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 7c50a28..d459321 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -213,38 +213,40 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
struct conn *conn = ctx->conn;
uint16_t state = 0;
- if (conn) {
- if (ctx->related) {
+ if (ctx->related) {
+ if (conn) {
state |= CS_RELATED;
if (ctx->reply) {
state |= CS_REPLY_DIR;
}
} else {
- enum ct_update_res res;
+ state |= CS_INVALID;
+ }
+ } else if (conn) {
+ enum ct_update_res res;
- res = conn_update(conn, &ct->buckets[bucket], pkt,
- ctx->reply, now);
+ res = conn_update(conn, &ct->buckets[bucket], pkt,
+ ctx->reply, now);
- switch (res) {
- case CT_UPDATE_VALID:
- state |= CS_ESTABLISHED;
- if (ctx->reply) {
- state |= CS_REPLY_DIR;
- }
- break;
- case CT_UPDATE_INVALID:
- state |= CS_INVALID;
- break;
- case CT_UPDATE_NEW:
- ovs_list_remove(&conn->exp_node);
- hmap_remove(&ct->buckets[bucket].connections, &conn->node);
- atomic_count_dec(&ct->n_conn);
- delete_conn(conn);
- conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
- break;
- default:
- OVS_NOT_REACHED();
+ switch (res) {
+ case CT_UPDATE_VALID:
+ state |= CS_ESTABLISHED;
+ if (ctx->reply) {
+ state |= CS_REPLY_DIR;
}
+ break;
+ case CT_UPDATE_INVALID:
+ state |= CS_INVALID;
+ break;
+ case CT_UPDATE_NEW:
+ ovs_list_remove(&conn->exp_node);
+ hmap_remove(&ct->buckets[bucket].connections, &conn->node);
+ atomic_count_dec(&ct->n_conn);
+ delete_conn(conn);
+ conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
+ break;
+ default:
+ OVS_NOT_REACHED();
}
} else {
conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index ffeca35..2a7575c 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -1527,12 +1527,8 @@ ADD_VETH(p1, at_ns1, br0, "172.16.0.2/24")
dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
AT_DATA([flows.txt], [dnl
-priority=1,action=drop
-priority=10,arp,action=normal
-priority=100,in_port=1,udp,ct_state=-trk,action=ct(commit,table=0)
-priority=100,in_port=1,ip,ct_state=+trk,actions=controller
-priority=100,in_port=2,ip,ct_state=-trk,action=ct(table=0)
-priority=100,in_port=2,ip,ct_state=+trk+rel+rpl,action=controller
+table=0,ip,action=ct(commit,table=1)
+table=1,ip,action=controller
])
AT_CHECK([ovs-ofctl --bundle replace-flows br0 flows.txt])
@@ -1541,22 +1537,31 @@ AT_CAPTURE_FILE([ofctl_monitor.log])
AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log])
dnl 1. Send an ICMP port unreach reply for port 8738, without any previous request
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 ct\(table=0\) 'f64c473528c9c6f54ecb72db080045c0003d2e8700004001f355ac100004ac1000030303553f0000000045000021317040004011b138ac100003ac10000411112222000d20966369616f0a'])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 resubmit\(,0\) 'f64c473528c9c6f54ecb72db080045c0003d2e8700004001f351ac100004ac1000030303da490000000045000021317040004011b138ac100003ac10000411112222000d20966369616f0a'])
dnl 2. Send and UDP packet to port 5555
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 1 ct\(commit,table=0\) 'c6f94ecb72dbe64c473528c9080045000021317040004011b138ac100001ac100002a28e15b3000d20966369616f0a'])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 1 resubmit\(,0\) 'c6f94ecb72dbe64c473528c9080045000021317040004011b138ac100001ac100002a28e15b3000d20966369616f0a'])
dnl 3. Send an ICMP port unreach reply for port 5555, related to the first packet
-AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 ct\(table=0\) 'e64c473528c9c6f94ecb72db080045c0003d2e8700004001f355ac100002ac1000010303553f0000000045000021317040004011b138ac100001ac100002a28e15b3000d20966369616f0a'])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 2 resubmit\(,0\) 'e64c473528c9c6f94ecb72db080045c0003d2e8700004001f355ac100002ac1000010303553f0000000045000021317040004011b138ac100001ac100002a28e15b3000d20966369616f0a'])
dnl Check this output. We only see the latter two packets, not the first.
AT_CHECK([cat ofctl_monitor.log], [0], [dnl
-NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=47 ct_state=new|trk,in_port=1 (via action) data_len=47 (unbuffered)
+NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=75 ct_state=inv|trk,in_port=2 (via action) data_len=75 (unbuffered)
+icmp,vlan_tci=0x0000,dl_src=c6:f5:4e:cb:72:db,dl_dst=f6:4c:47:35:28:c9,nw_src=172.16.0.4,nw_dst=172.16.0.3,nw_tos=192,nw_ecn=0,nw_ttl=64,icmp_type=3,icmp_code=3 icmp_csum:da49
+NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=47 ct_state=new|trk,in_port=1 (via action) data_len=47 (unbuffered)
udp,vlan_tci=0x0000,dl_src=e6:4c:47:35:28:c9,dl_dst=c6:f9:4e:cb:72:db,nw_src=172.16.0.1,nw_dst=172.16.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=41614,tp_dst=5555 udp_csum:2096
-NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=75 ct_state=rel|rpl|trk,in_port=2 (via action) data_len=75 (unbuffered)
+NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=75 ct_state=rel|rpl|trk,in_port=2 (via action) data_len=75 (unbuffered)
icmp,vlan_tci=0x0000,dl_src=c6:f9:4e:cb:72:db,dl_dst=e6:4c:47:35:28:c9,nw_src=172.16.0.2,nw_dst=172.16.0.1,nw_tos=192,nw_ecn=0,nw_ttl=64,icmp_type=3,icmp_code=3 icmp_csum:553f
])
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1)], [0], [dnl
+udp,orig=(src=172.16.0.1,dst=172.16.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>)
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.3)], [0], [dnl
+])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
--
2.10.2
_______________________________________________
dev mailing list
dev at openvswitch.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DgICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=ylNAQ0BevlSeMuwMeI0axFFRJW1ntKUwjUGLpBUeVJk&s=iAb5W9gaXwKvkKUpZtnQIm2kaZdHYOU3vRiI6ILGKFU&e=
More information about the dev
mailing list