[ovs-dev] [PATCH net-next] net: openvswitch: Add support to lookup invalid packet in ct action.

nusiddiq at redhat.com nusiddiq at redhat.com
Tue Oct 6 08:33:55 UTC 2020


From: Numan Siddique <nusiddiq at redhat.com>

For a tcp packet which is part of an existing committed connection,
nf_conntrack_in() will return err and set skb->_nfct to NULL if it is
out of tcp window. ct action for this packet will set the ct_state
to +inv which is as expected.

But a controller cannot add an OVS flow as

table=21,priority=100,ct_state=+inv, actions=drop

to drop such packets. That is because when ct action is executed on other
packets which are not part of existing committed connections, ct_state
can be set to invalid. Few such cases are:
   - ICMP reply packets.
   - TCP SYN/ACK packets during connection establishment.
   - SCTP INIT ACK, COOKIE ACK, DATA and DATA ACK packets.

To distinguish between an invalid packet part of committed connection
and others, this patch introduces as a new ct attribute
OVS_CT_ATTR_LOOKUP_INV. If this is set in the ct action (without commit),
it tries to find the ct entry and if present, sets the ct_state to
+inv,+trk and also sets the mark and labels associated with the
connection.

With this,  a controller can add flows like

....
....
table=20,ip, action=ct(table=21, lookup_invalid)
table=21,priority=100,ct_state=+inv+trk,ct_label=0x2/0x2 actions=drop
table=21,ip, actions=resubmit(,22)
....
....

CC: Pravin B Shelar <pshelar at ovn.org>
Signed-off-by: Numan Siddique <nusiddiq at redhat.com>
---

RFC -> PATCH
------
  * Changed the patch from RFC to a formal one. No other changes.

 include/uapi/linux/openvswitch.h |  4 +++
 net/openvswitch/conntrack.c      | 47 ++++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8300cc29dec8..db942986c5b7 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -768,6 +768,9 @@ struct ovs_action_hash {
  * respectively.  Remaining bits control the changes for which an event is
  * delivered on the NFNLGRP_CONNTRACK_UPDATE group.
  * @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout.
+ * @OVS_CT_ATTR_LOOKUP_INV: If present, looks up and sets the state, mark and
+ * labels for an invalid packet (eg. out of tcp window) if it is part of
+ * committed connection.
  */
 enum ovs_ct_attr {
 	OVS_CT_ATTR_UNSPEC,
@@ -782,6 +785,7 @@ enum ovs_ct_attr {
 	OVS_CT_ATTR_EVENTMASK,  /* u32 mask of IPCT_* events. */
 	OVS_CT_ATTR_TIMEOUT,	/* Associate timeout with this connection for
 				 * fine-grain timeout tuning. */
+	OVS_CT_ATTR_LOOKUP_INV, /* No argument. */
 	__OVS_CT_ATTR_MAX
 };
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e6fe26a9c892..a6f96d9b4452 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -62,6 +62,7 @@ struct ovs_conntrack_info {
 	u8 nat : 3;                 /* enum ovs_ct_nat */
 	u8 force : 1;
 	u8 have_eventmask : 1;
+	u8 lookup_invalid : 1;
 	u16 family;
 	u32 eventmask;              /* Mask of 1 << IPCT_*. */
 	struct md_mark mark;
@@ -601,12 +602,13 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
  *
  * Must be called with rcu_read_lock.
  *
- * On success, populates skb->_nfct and returns the connection.  Returns NULL
- * if there is no existing entry.
+ * On success, populates skb->_nfct if 'skb_set_ct' is true and returns the
+ * connection.  Returns NULL if there is no existing entry.
  */
 static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
-		     u8 l3num, struct sk_buff *skb, bool natted)
+		     u8 l3num, struct sk_buff *skb, bool natted,
+		     bool skb_set_ct)
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
@@ -636,14 +638,17 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
-	/* Inverted packet tuple matches the reverse direction conntrack tuple,
-	 * select the other tuplehash to get the right 'ctinfo' bits for this
-	 * packet.
-	 */
-	if (natted)
-		h = &ct->tuplehash[!h->tuple.dst.dir];
+	if (skb_set_ct) {
+		/* Inverted packet tuple matches the reverse direction
+		 * conntrack tuple, select the other tuplehash to get the
+		 * right 'ctinfo' bits for this packet.
+		 */
+		if (natted)
+			h = &ct->tuplehash[!h->tuple.dst.dir];
+
+		nf_ct_set(skb, ct, ovs_ct_get_info(h));
+	}
 
-	nf_ct_set(skb, ct, ovs_ct_get_info(h));
 	return ct;
 }
 
@@ -669,7 +674,7 @@ struct nf_conn *ovs_ct_executed(struct net *net,
 	if (*ct_executed || (!key->ct_state && info->force)) {
 		ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
 					  !!(key->ct_state &
-					  OVS_CS_F_NAT_MASK));
+					  OVS_CS_F_NAT_MASK), true);
 	}
 
 	return ct;
@@ -1033,6 +1038,20 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		    ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
 			return -EINVAL;
 		}
+	} else if (info->lookup_invalid) {
+		/* nf_conntrack_in() sets skb->_nfct to NULL if the packet is
+		 * invalid (eg. out of tcp window) even if it belongs to
+		 * an existing connection. Check if there is an existing entry
+		 * and if so, update the key with the mark and ct_labels.
+		 */
+		ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
+					  false, false);
+		if (ct) {
+			u8 state;
+
+			state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+			__ovs_ct_update_key(key, state, &info->zone, ct);
+		}
 	}
 
 	return 0;
@@ -1602,6 +1621,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 			}
 			break;
 #endif
+		case OVS_CT_ATTR_LOOKUP_INV:
+			info->lookup_invalid = true;
+			break;
 
 		default:
 			OVS_NLERR(log, "Unknown conntrack attr (%d)",
@@ -1819,6 +1841,9 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
 		if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout))
 			return -EMSGSIZE;
 	}
+	if (ct_info->lookup_invalid &&
+	    nla_put_flag(skb, OVS_CT_ATTR_LOOKUP_INV))
+		return -EMSGSIZE;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
 	if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
-- 
2.26.2



More information about the dev mailing list