[ovs-dev] [PATCH 4/5] datapath: Add stateful NAT action

Thomas Graf tgraf at noironetworks.com
Fri Sep 26 22:00:16 UTC 2014


WIP
---
 datapath/actions.c                                |  95 +++++++++++++++
 datapath/flow.h                                   |   6 +
 datapath/flow_netlink.c                           | 139 ++++++++++++++++++++++
 datapath/linux/compat/include/linux/openvswitch.h |  54 +++++++++
 4 files changed, 294 insertions(+)

diff --git a/datapath/actions.c b/datapath/actions.c
index 05b465c..b3196b7 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -33,6 +33,7 @@
 #include <net/checksum.h>
 #include <net/dsfield.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat_core.h>
 #include <net/sctp/checksum.h>
 
 #include "datapath.h"
@@ -782,6 +783,96 @@ static int conntrack(struct datapath *dp, struct sk_buff *skb,
 	return 0;
 }
 
+static int ovs_nat_handle_ct_new(struct nf_conn *ct, struct ovs_nat_info *info)
+{
+	int err;
+
+	/* Seen it before?  This can happen for loopback, retrans,
+	 * or local packets.
+	 */
+	if (nf_nat_initialized(ct, info->type))
+		return 0;
+
+	if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
+		/* Action is set up to establish a new mapping */
+		err = nf_nat_setup_info(ct, &info->range, info->type);
+	} else {
+		/* Force range to this IP; let proto decide mapping for
+		 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+		 * Use reply in case it's already been mangled (eg local
+		 * packet).
+		 */
+		union nf_inet_addr ip =
+			(info->type == NF_NAT_MANIP_SRC ?
+			ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+			ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+
+		struct nf_nat_range range = {
+			.flags		= NF_NAT_RANGE_MAP_IPS,
+			.min_addr	= ip,
+			.max_addr	= ip,
+		};
+
+		err = nf_nat_setup_info(ct, &range, info->type);
+	}
+
+	return err;
+}
+
+static int ovs_nat(struct sk_buff *skb, struct ovs_nat_info *info)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	struct nf_conn *ct;
+	int hooknum, nh_off, err;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || nf_ct_is_untracked(ct)) {
+		WARN(1, "NAT: Untracked packet");
+		/* FIXME: Bump counter? */
+		return 0;
+	}
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return 0;
+
+	nh_off = skb_network_offset(skb);
+	skb_pull(skb, nh_off);
+	/* FIXME: COW */
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		/* FIXME: Handle ICMP, see nf_nat_ipv4_fn() */
+
+		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+	case IP_CT_NEW:
+		if (ovs_nat_handle_ct_new(ct, info) != NF_ACCEPT) {
+			err = -EINVAL;
+			goto push;
+		}
+		break;
+
+	default:
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
+		err = -EINVAL;
+		goto push;
+	}
+
+	if (info->type == NF_NAT_MANIP_SRC)
+		hooknum = NF_INET_LOCAL_IN;
+	else
+		hooknum = NF_INET_LOCAL_OUT;
+
+	err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+push:
+	skb_push(skb, nh_off);
+
+	return err;
+}
+
 static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
 			      const struct nlattr *nested_attr)
 {
@@ -956,6 +1047,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		case OVS_ACTION_ATTR_CONNTRACK:
 			err = conntrack(dp, skb, key, nla_data(a));
 			break;
+
+		case OVS_ACTION_ATTR_NAT:
+			err = ovs_nat(skb, nla_data(a));
+			break;
 		}
 
 		if (unlikely(err)) {
diff --git a/datapath/flow.h b/datapath/flow.h
index ce74958..5f2c0bb 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -35,6 +35,7 @@
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
 
 struct sk_buff;
 
@@ -69,6 +70,11 @@ struct ovs_tunnel_info {
 					FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
 					   opt_len)
 
+struct ovs_nat_info {
+	__u32 type;
+	struct nf_nat_range range;
+};
+
 static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					    __be32 saddr, __be32 daddr,
 					    u8 tos, u8 ttl,
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 75dc87f..ebf7bd7 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -43,6 +43,7 @@
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
 #include <linux/rculist.h>
+#include <linux/netfilter/nf_nat.h>
 #include <net/geneve.h>
 #include <net/ip.h>
 #include <net/ip_tunnels.h>
@@ -1806,6 +1807,106 @@ static int validate_userspace(const struct nlattr *attr)
 	return 0;
 }
 
+static int validate_and_copy_nat(const struct nlattr *attr,
+				const struct sw_flow_key *key,
+				struct sw_flow_actions **sfa)
+{
+	struct ovs_nat_info nat_info;
+	struct nlattr *a;
+	int rem;
+
+	BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_RAND != NF_NAT_RANGE_PROTO_RANDOM);
+	BUILD_BUG_ON(OVS_NAT_FLAG_PERSISTENT != NF_NAT_RANGE_PERSISTENT);
+	BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_FULL_RAND != NF_NAT_RANGE_PROTO_RANDOM_FULLY);
+
+	memset(&nat_info, 0, sizeof(nat_info));
+
+	nla_for_each_nested(a, attr, rem) {
+		static const u32 ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1] = {
+			[OVS_NAT_ATTR_TYPE] = sizeof(u32),
+			[OVS_NAT_ATTR_IP_MIN] = -1,
+			[OVS_NAT_ATTR_IP_MAX] = -1,
+			[OVS_NAT_ATTR_PROTO_MIN] = sizeof(u16),
+			[OVS_NAT_ATTR_PROTO_MAX] = sizeof(u16),
+			[OVS_NAT_ATTR_FLAGS] = sizeof(u32),
+		};
+		int type = nla_type(a);
+
+		if (type > OVS_NAT_ATTR_MAX) {
+			OVS_NLERR("Unknown nat attribute (type=%d, max=%d).\n",
+			type, OVS_NAT_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (ovs_nat_attr_lens[type] != nla_len(a) &&
+		    ovs_nat_attr_lens[type] != -1) {
+			OVS_NLERR("NAT attribute type has unexpected "
+				  " length (type=%d, length=%d, expected=%d).\n",
+				  type, nla_len(a), ovs_nat_attr_lens[type]);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NAT_ATTR_TYPE:
+			nat_info.type = nla_get_u32(a);
+			if (nat_info.type > OVS_NAT_TYPE_MAX) {
+				OVS_NLERR("NAT type %d out of range 0..%d\n",
+				    nat_info.type, OVS_NAT_TYPE_MAX);
+				return -ERANGE;
+			}
+			break;
+
+		case OVS_NAT_ATTR_IP_MIN:
+			if (nla_len(a) != sizeof(struct in_addr) &&
+			    nla_len(a) != sizeof(struct in6_addr)) {
+				return -ERANGE;
+			}
+
+			nla_memcpy(&nat_info.range.min_addr, a,
+				   sizeof(nat_info.range.min_addr));
+			nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS;
+			break;
+
+		case OVS_NAT_ATTR_IP_MAX:
+			if (nla_len(a) != sizeof(struct in_addr) &&
+			    nla_len(a) != sizeof(struct in6_addr)) {
+				return -ERANGE;
+			}
+
+			nla_memcpy(&nat_info.range.min_addr, a,
+				   sizeof(nat_info.range.min_addr));
+			nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS;
+			break;
+
+		case OVS_NAT_ATTR_PROTO_MIN:
+			nat_info.range.min_proto.all = nla_get_u16(a);
+			nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+			break;
+
+		case OVS_NAT_ATTR_PROTO_MAX:
+			nat_info.range.max_proto.all = nla_get_u16(a);
+			nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+			break;
+
+		case OVS_NAT_ATTR_FLAGS:
+			nat_info.range.flags |= (nla_get_u32(a) | OVS_NAT_FLAGS);
+			break;
+
+		default:
+			OVS_NLERR("Unknown nat attribute (%d).\n", type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR("NAT attribute has %d unknown bytes.\n", rem);
+		return -EINVAL;
+	}
+
+	return add_action(sfa, OVS_ACTION_ATTR_NAT, &nat_info,
+			  sizeof(nat_info));
+}
+
 static int copy_action(const struct nlattr *from,
 		       struct sw_flow_actions **sfa)
 {
@@ -1845,6 +1946,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
 			[OVS_ACTION_ATTR_CONNTRACK] = (u32)-1,
+			[OVS_ACTION_ATTR_NAT] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -1957,6 +2059,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			skip_copy = true;
 			break;
 
+		case OVS_ACTION_ATTR_NAT:
+			err = validate_and_copy_nat(a, key, sfa);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -2079,6 +2188,30 @@ static int conntrack_action_to_attr(const struct nlattr *attr, struct sk_buff *s
 	return 0;
 }
 
+static int nat_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+{
+	struct ovs_nat_info *info;
+	struct nlattr *start;
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_NAT);
+	if (!start)
+		return -EMSGSIZE;
+
+	info = nla_data(attr);
+
+	if (nla_put_u32(skb, OVS_NAT_ATTR_TYPE, info->type) ||
+	    nla_put_u32(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) ||
+	    nla_put_u32(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip) ||
+	    nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, info->range.min_proto.all) ||
+	    nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, info->range.max_proto.all) ||
+	    nla_put_u32(skb, OVS_NAT_ATTR_FLAGS, info->range.flags | OVS_NAT_FLAGS))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, start);
+
+	return 0;
+}
+
 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 {
 	const struct nlattr *a;
@@ -2106,6 +2239,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_NAT:
+			err = nat_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index f3654de..b65efc6 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -592,6 +592,57 @@ enum ovs_conntrack_attr {
 #define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
 
 /**
+ * enum ovs_nat_type - Supported NAT modes
+ */
+enum ovs_nat_type {
+	OVS_NAT_TYPE_SRC,
+	OVS_NAT_TYPE_DST,
+	__OVS_NAT_TYPE_MAX,
+};
+
+#define OVS_NAT_TYPE_MAX (__OVS_NAT_TYPE_MAX - 1)
+
+/**
+ * enum ovs_nat_flag - Supported NAT flags
+ * @OVS_NAT_FLAG_PROTO_RAND: Pseudo random hash based L4 port mapping (MD5)
+ * @OVS_NAT_FLAG_PERSISTENT: Persistent IP mapping across reboots
+ * @OVS_NAT_FLAG_PROTO_FULL_RAND: Fully randomized L4 port mapping
+ *
+ * NOTE: The flags values must be compatible with NF_NAT_RANGE_* in
+ * <linux/netfilter/nf_nat.h>.
+ */
+enum ovs_nat_flag {
+	OVS_NAT_FLAG_PROTO_RAND		= 0x4,
+	OVS_NAT_FLAG_PERSISTENT		= 0x8,
+	OVS_NAT_FLAG_PROTO_FULL_RAND	= 0x10,
+};
+
+#define OVS_NAT_FLAGS (OVS_NAT_FLAG_PROTO_RAND | OVS_NAT_FLAG_PERSISTENT | \
+		       OVS_NAT_FLAG_PROTO_FULL_RAND)
+
+/**
+ * enum ovs_nat_attr - Attributes for %OVS_ACTION_ATTR_NAT action.
+ * @OVS_NAT_ATTR_TYPE: u32 NAT type (enum ovs_nat_type)
+ * @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr
+ * @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr
+ * @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port)
+ * @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port)
+ * @OVS_NAT_ATTR_FLAGS: u32 NAT flags (OVS_NAT_FLAG_*)
+ */
+enum ovs_nat_attr {
+	OVS_NAT_ATTR_UNSPEC,
+	OVS_NAT_ATTR_TYPE,
+	OVS_NAT_ATTR_IP_MIN,
+	OVS_NAT_ATTR_IP_MAX,
+	OVS_NAT_ATTR_PROTO_MIN,
+	OVS_NAT_ATTR_PROTO_MAX,
+	OVS_NAT_ATTR_FLAGS,
+	__OVS_NAT_ATTR_MAX,
+};
+
+#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
+
+/**
  * enum ovs_action_attr - Action types.
  *
  * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -623,6 +674,8 @@ enum ovs_conntrack_attr {
  * %ETH_P_MPLS if the resulting MPLS label stack is not empty.  If there
  * is no MPLS label stack, as determined by ethertype, no action is taken.
  * @OVS_ACTION_ATTR_CONNTRACK: Track the connection.
+ * @OVS_ACTION_ATTR_NAT: Perform L3 network address translation (NAT) on
+ * the packet using the Netfilter subsystem.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -646,6 +699,7 @@ enum ovs_action_attr {
 				       * The data must be zero for the unmasked
 				       * bits. */
 	OVS_ACTION_ATTR_CONNTRACK,    /* One nested OVS_CT_ATTR_* */
+	OVS_ACTION_ATTR_NAT,          /* Nested OVS_NAT_ATTR_* */
 	__OVS_ACTION_ATTR_MAX
 };
 
-- 
1.9.3




More information about the dev mailing list