[ovs-dev] [PATCH 4/5] datapath: Add stateful NAT action
Thomas Graf
tgraf at noironetworks.com
Fri Sep 26 22:00:16 UTC 2014
WIP
---
datapath/actions.c | 95 +++++++++++++++
datapath/flow.h | 6 +
datapath/flow_netlink.c | 139 ++++++++++++++++++++++
datapath/linux/compat/include/linux/openvswitch.h | 54 +++++++++
4 files changed, 294 insertions(+)
diff --git a/datapath/actions.c b/datapath/actions.c
index 05b465c..b3196b7 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -33,6 +33,7 @@
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat_core.h>
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -782,6 +783,96 @@ static int conntrack(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+static int ovs_nat_handle_ct_new(struct nf_conn *ct, struct ovs_nat_info *info)
+{
+ int err;
+
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (nf_nat_initialized(ct, info->type))
+ return 0;
+
+ if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
+ /* Action is set up to establish a new mapping */
+ err = nf_nat_setup_info(ct, &info->range, info->type);
+ } else {
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ * Use reply in case it's already been mangled (eg local
+ * packet).
+ */
+ union nf_inet_addr ip =
+ (info->type == NF_NAT_MANIP_SRC ?
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+
+ struct nf_nat_range range = {
+ .flags = NF_NAT_RANGE_MAP_IPS,
+ .min_addr = ip,
+ .max_addr = ip,
+ };
+
+ err = nf_nat_setup_info(ct, &range, info->type);
+ }
+
+ return err;
+}
+
+static int ovs_nat(struct sk_buff *skb, struct ovs_nat_info *info)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ struct nf_conn *ct;
+ int hooknum, nh_off, err;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || nf_ct_is_untracked(ct)) {
+ WARN(1, "NAT: Untracked packet");
+ /* FIXME: Bump counter? */
+ return 0;
+ }
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return 0;
+
+ nh_off = skb_network_offset(skb);
+ skb_pull(skb, nh_off);
+ /* FIXME: COW */
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ /* FIXME: Handle ICMP, see nf_nat_ipv4_fn() */
+
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ if (ovs_nat_handle_ct_new(ct, info) != NF_ACCEPT) {
+ err = -EINVAL;
+ goto push;
+ }
+ break;
+
+ default:
+ WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY);
+ err = -EINVAL;
+ goto push;
+ }
+
+ if (info->type == NF_NAT_MANIP_SRC)
+ hooknum = NF_INET_LOCAL_IN;
+ else
+ hooknum = NF_INET_LOCAL_OUT;
+
+ err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+push:
+ skb_push(skb, nh_off);
+
+ return err;
+}
+
static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
const struct nlattr *nested_attr)
{
@@ -956,6 +1047,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_CONNTRACK:
err = conntrack(dp, skb, key, nla_data(a));
break;
+
+ case OVS_ACTION_ATTR_NAT:
+ err = ovs_nat(skb, nla_data(a));
+ break;
}
if (unlikely(err)) {
diff --git a/datapath/flow.h b/datapath/flow.h
index ce74958..5f2c0bb 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -35,6 +35,7 @@
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
struct sk_buff;
@@ -69,6 +70,11 @@ struct ovs_tunnel_info {
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
opt_len)
+struct ovs_nat_info {
+ __u32 type;
+ struct nf_nat_range range;
+};
+
static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be32 saddr, __be32 daddr,
u8 tos, u8 ttl,
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 75dc87f..ebf7bd7 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -43,6 +43,7 @@
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
+#include <linux/netfilter/nf_nat.h>
#include <net/geneve.h>
#include <net/ip.h>
#include <net/ip_tunnels.h>
@@ -1806,6 +1807,106 @@ static int validate_userspace(const struct nlattr *attr)
return 0;
}
+static int validate_and_copy_nat(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa)
+{
+ struct ovs_nat_info nat_info;
+ struct nlattr *a;
+ int rem;
+
+ BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_RAND != NF_NAT_RANGE_PROTO_RANDOM);
+ BUILD_BUG_ON(OVS_NAT_FLAG_PERSISTENT != NF_NAT_RANGE_PERSISTENT);
+ BUILD_BUG_ON(OVS_NAT_FLAG_PROTO_FULL_RAND != NF_NAT_RANGE_PROTO_RANDOM_FULLY);
+
+ memset(&nat_info, 0, sizeof(nat_info));
+
+ nla_for_each_nested(a, attr, rem) {
+ static const u32 ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1] = {
+ [OVS_NAT_ATTR_TYPE] = sizeof(u32),
+ [OVS_NAT_ATTR_IP_MIN] = -1,
+ [OVS_NAT_ATTR_IP_MAX] = -1,
+ [OVS_NAT_ATTR_PROTO_MIN] = sizeof(u16),
+ [OVS_NAT_ATTR_PROTO_MAX] = sizeof(u16),
+ [OVS_NAT_ATTR_FLAGS] = sizeof(u32),
+ };
+ int type = nla_type(a);
+
+ if (type > OVS_NAT_ATTR_MAX) {
+ OVS_NLERR("Unknown nat attribute (type=%d, max=%d).\n",
+ type, OVS_NAT_ATTR_MAX);
+ return -EINVAL;
+ }
+
+ if (ovs_nat_attr_lens[type] != nla_len(a) &&
+ ovs_nat_attr_lens[type] != -1) {
+ OVS_NLERR("NAT attribute type has unexpected "
+ " length (type=%d, length=%d, expected=%d).\n",
+ type, nla_len(a), ovs_nat_attr_lens[type]);
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_NAT_ATTR_TYPE:
+ nat_info.type = nla_get_u32(a);
+ if (nat_info.type > OVS_NAT_TYPE_MAX) {
+ OVS_NLERR("NAT type %d out of range 0..%d\n",
+ nat_info.type, OVS_NAT_TYPE_MAX);
+ return -ERANGE;
+ }
+ break;
+
+ case OVS_NAT_ATTR_IP_MIN:
+ if (nla_len(a) != sizeof(struct in_addr) &&
+ nla_len(a) != sizeof(struct in6_addr)) {
+ return -ERANGE;
+ }
+
+ nla_memcpy(&nat_info.range.min_addr, a,
+ sizeof(nat_info.range.min_addr));
+ nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS;
+ break;
+
+ case OVS_NAT_ATTR_IP_MAX:
+ if (nla_len(a) != sizeof(struct in_addr) &&
+ nla_len(a) != sizeof(struct in6_addr)) {
+ return -ERANGE;
+ }
+
+ nla_memcpy(&nat_info.range.min_addr, a,
+ sizeof(nat_info.range.min_addr));
+ nat_info.range.flags |= NF_NAT_RANGE_MAP_IPS;
+ break;
+
+ case OVS_NAT_ATTR_PROTO_MIN:
+ nat_info.range.min_proto.all = nla_get_u16(a);
+ nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ break;
+
+ case OVS_NAT_ATTR_PROTO_MAX:
+ nat_info.range.max_proto.all = nla_get_u16(a);
+ nat_info.range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ break;
+
+ case OVS_NAT_ATTR_FLAGS:
+ nat_info.range.flags |= (nla_get_u32(a) | OVS_NAT_FLAGS);
+ break;
+
+ default:
+ OVS_NLERR("Unknown nat attribute (%d).\n", type);
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ OVS_NLERR("NAT attribute has %d unknown bytes.\n", rem);
+ return -EINVAL;
+ }
+
+ return add_action(sfa, OVS_ACTION_ATTR_NAT, &nat_info,
+ sizeof(nat_info));
+}
+
static int copy_action(const struct nlattr *from,
struct sw_flow_actions **sfa)
{
@@ -1845,6 +1946,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CONNTRACK] = (u32)-1,
+ [OVS_ACTION_ATTR_NAT] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -1957,6 +2059,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_NAT:
+ err = validate_and_copy_nat(a, key, sfa);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
default:
return -EINVAL;
}
@@ -2079,6 +2188,30 @@ static int conntrack_action_to_attr(const struct nlattr *attr, struct sk_buff *s
return 0;
}
+static int nat_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+{
+ struct ovs_nat_info *info;
+ struct nlattr *start;
+
+ start = nla_nest_start(skb, OVS_ACTION_ATTR_NAT);
+ if (!start)
+ return -EMSGSIZE;
+
+ info = nla_data(attr);
+
+ if (nla_put_u32(skb, OVS_NAT_ATTR_TYPE, info->type) ||
+ nla_put_u32(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) ||
+ nla_put_u32(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip) ||
+ nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, info->range.min_proto.all) ||
+ nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, info->range.max_proto.all) ||
+ nla_put_u32(skb, OVS_NAT_ATTR_FLAGS, info->range.flags | OVS_NAT_FLAGS))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, start);
+
+ return 0;
+}
+
int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
{
const struct nlattr *a;
@@ -2106,6 +2239,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
return err;
break;
+ case OVS_ACTION_ATTR_NAT:
+ err = nat_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index f3654de..b65efc6 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -592,6 +592,57 @@ enum ovs_conntrack_attr {
#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
/**
+ * enum ovs_nat_type - Supported NAT modes
+ */
+enum ovs_nat_type {
+ OVS_NAT_TYPE_SRC,
+ OVS_NAT_TYPE_DST,
+ __OVS_NAT_TYPE_MAX,
+};
+
+#define OVS_NAT_TYPE_MAX (__OVS_NAT_TYPE_MAX - 1)
+
+/**
+ * enum ovs_nat_flag - Supported NAT flags
+ * @OVS_NAT_FLAG_PROTO_RAND: Pseudo random hash based L4 port mapping (MD5)
+ * @OVS_NAT_FLAG_PERSISTENT: Persistent IP mapping across reboots
+ * @OVS_NAT_FLAG_PROTO_FULL_RAND: Fully randomized L4 port mapping
+ *
+ * NOTE: The flags values must be compatible with NF_NAT_RANGE_* in
+ * <linux/netfilter/nf_nat.h>.
+ */
+enum ovs_nat_flag {
+ OVS_NAT_FLAG_PROTO_RAND = 0x4,
+ OVS_NAT_FLAG_PERSISTENT = 0x8,
+ OVS_NAT_FLAG_PROTO_FULL_RAND = 0x10,
+};
+
+#define OVS_NAT_FLAGS (OVS_NAT_FLAG_PROTO_RAND | OVS_NAT_FLAG_PERSISTENT | \
+ OVS_NAT_FLAG_PROTO_FULL_RAND)
+
+/**
+ * enum ovs_nat_attr - Attributes for %OVS_ACTION_ATTR_NAT action.
+ * @OVS_NAT_ATTR_TYPE: u32 NAT type (enum ovs_nat_type)
+ * @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr
+ * @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr
+ * @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port)
+ * @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port)
+ * @OVS_NAT_ATTR_FLAGS: u32 NAT flags (OVS_NAT_FLAG_*)
+ */
+enum ovs_nat_attr {
+ OVS_NAT_ATTR_UNSPEC,
+ OVS_NAT_ATTR_TYPE,
+ OVS_NAT_ATTR_IP_MIN,
+ OVS_NAT_ATTR_IP_MAX,
+ OVS_NAT_ATTR_PROTO_MIN,
+ OVS_NAT_ATTR_PROTO_MAX,
+ OVS_NAT_ATTR_FLAGS,
+ __OVS_NAT_ATTR_MAX,
+};
+
+#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
+
+/**
* enum ovs_action_attr - Action types.
*
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -623,6 +674,8 @@ enum ovs_conntrack_attr {
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
* @OVS_ACTION_ATTR_CONNTRACK: Track the connection.
+ * @OVS_ACTION_ATTR_NAT: Perform L3 network address translation (NAT) on
+ * the packet using the Netfilter subsystem.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -646,6 +699,7 @@ enum ovs_action_attr {
* The data must be zero for the unmasked
* bits. */
OVS_ACTION_ATTR_CONNTRACK, /* One nested OVS_CT_ATTR_* */
+ OVS_ACTION_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
__OVS_ACTION_ATTR_MAX
};
--
1.9.3
More information about the dev
mailing list