[ovs-dev] [PATCH v3 3/3] datapath: add layer 3 flow/port support
Lorand Jakab
lojakab at cisco.com
Tue May 13 14:02:16 UTC 2014
Implementation of the pop_eth and push_eth actions in the kernel, and
layer 3 flow support.
Signed-off-by: Lorand Jakab <lojakab at cisco.com>
---
datapath/actions.c | 35 +++++++++++++++++++++
datapath/datapath.h | 1 +
datapath/flow.c | 43 ++++++++++++++------------
datapath/flow.h | 1 +
datapath/flow_netlink.c | 72 ++++++++++++++++++++++++++++++++++++-------
datapath/vport-gre.c | 5 ++-
datapath/vport-internal_dev.c | 5 ++-
datapath/vport-lisp.c | 26 ++++------------
datapath/vport-netdev.c | 5 ++-
datapath/vport-vxlan.c | 7 ++++-
datapath/vport.c | 5 ++-
datapath/vport.h | 2 +-
12 files changed, 151 insertions(+), 56 deletions(-)
diff --git a/datapath/actions.c b/datapath/actions.c
index 7fe2f54..cd0c91d 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -143,6 +143,33 @@ static int set_eth_addr(struct sk_buff *skb,
return 0;
}
+static int pop_eth(struct sk_buff *skb)
+{
+ skb_pull_rcsum(skb, skb_network_offset(skb));
+ skb_reset_mac_header(skb);
+ vlan_set_tci(skb, 0);
+
+ OVS_CB(skb)->is_layer3 = true;
+
+ return 0;
+}
+
+static void push_eth(struct sk_buff *skb, const struct ovs_action_push_eth *ethh)
+{
+ skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+
+ ether_addr_copy(eth_hdr(skb)->h_source, ethh->addresses.eth_src);
+ ether_addr_copy(eth_hdr(skb)->h_dest, ethh->addresses.eth_dst);
+
+ eth_hdr(skb)->h_proto = ethh->eth_type;
+ skb->protocol = ethh->eth_type;
+
+ ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+
+ OVS_CB(skb)->is_layer3 = false;
+}
+
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
__be32 *addr, __be32 new_addr)
{
@@ -585,6 +612,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb);
break;
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ push_eth(skb, nla_data(a));
+ break;
+
+ case OVS_ACTION_ATTR_POP_ETH:
+ err = pop_eth(skb);
+ break;
+
case OVS_ACTION_ATTR_RECIRC: {
struct sk_buff *recirc_skb;
const bool last_action = (a->nla_len == rem);
diff --git a/datapath/datapath.h b/datapath/datapath.h
index a847bd9..b8dd33c 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -107,6 +107,7 @@ struct ovs_skb_cb {
struct sw_flow_key *pkt_key;
struct ovs_key_ipv4_tunnel *tun_key;
struct vport *input_vport;
+ bool is_layer3;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/datapath/flow.c b/datapath/flow.c
index c52081b..a31c529 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -458,26 +458,31 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
skb_reset_mac_header(skb);
- /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
- * header in the linear data area.
- */
- eth = eth_hdr(skb);
- ether_addr_copy(key->eth.src, eth->h_source);
- ether_addr_copy(key->eth.dst, eth->h_dest);
-
- __skb_pull(skb, 2 * ETH_ALEN);
- /* We are going to push all headers that we pull, so no need to
- * update skb->csum here. */
-
- if (vlan_tx_tag_present(skb))
- key->eth.tci = htons(vlan_get_tci(skb));
- else if (eth->h_proto == htons(ETH_P_8021Q))
- if (unlikely(parse_vlan(skb, key)))
+ /* Link layer. */
+ if (OVS_CB(skb)->is_layer3) {
+ /* The receiving L3 vport should set the inner packet protocol
+ * on the skb. We use that here to set eth.type */
+ key->phy.noeth = true;
+ key->eth.type = skb->protocol;
+ } else {
+ eth = eth_hdr(skb);
+ ether_addr_copy(key->eth.src, eth->h_source);
+ ether_addr_copy(key->eth.dst, eth->h_dest);
+
+ __skb_pull(skb, 2 * ETH_ALEN);
+ /* We are going to push all headers that we pull, so no need to
+ * update skb->csum here. */
+
+ if (vlan_tx_tag_present(skb))
+ key->eth.tci = htons(vlan_get_tci(skb));
+ else if (eth->h_proto == htons(ETH_P_8021Q))
+ if (unlikely(parse_vlan(skb, key)))
+ return -ENOMEM;
+
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
-
- key->eth.type = parse_ethertype(skb);
- if (unlikely(key->eth.type == htons(0)))
- return -ENOMEM;
+ }
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
diff --git a/datapath/flow.h b/datapath/flow.h
index 2018691..7c0407a 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -73,6 +73,7 @@ struct sw_flow_key {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
+ bool noeth; /* Packet has no Ethernet header */
} __packed phy; /* Safe when right after 'tun_key'. */
u32 ovs_flow_hash; /* Datapath computed hash value. */
u32 recirc_id; /* Recirculation ID. */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 803a94c..849f415 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -109,14 +109,12 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
static bool match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs)
{
- u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
+ u64 key_expected = 0;
u64 mask_allowed = key_attrs; /* At most allow all key attributes */
/* The following mask attributes allowed only if they
* pass the validation tests. */
- mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
- | (1ULL << OVS_KEY_ATTR_IPV6)
- | (1ULL << OVS_KEY_ATTR_TCP)
+ mask_allowed &= ~((1ULL << OVS_KEY_ATTR_TCP)
| (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
| (1ULL << OVS_KEY_ATTR_UDP)
| (1ULL << OVS_KEY_ATTR_SCTP)
@@ -128,7 +126,10 @@ static bool match_validate(const struct sw_flow_match *match,
/* Always allowed mask fields. */
mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
| (1ULL << OVS_KEY_ATTR_IN_PORT)
- | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
+ | (1ULL << OVS_KEY_ATTR_ETHERNET)
+ | (1ULL << OVS_KEY_ATTR_ETHERTYPE)
+ | (1ULL << OVS_KEY_ATTR_IPV4)
+ | (1ULL << OVS_KEY_ATTR_IPV6));
/* Check key attributes. */
if (match->key->eth.type == htons(ETH_P_ARP)
@@ -524,8 +525,10 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
eth_key->eth_src, ETH_ALEN, is_mask);
SW_FLOW_KEY_MEMCPY(match, eth.dst,
eth_key->eth_dst, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_PUT(match, phy.noeth, false, is_mask);
attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
- }
+ } else if (!is_mask)
+ SW_FLOW_KEY_PUT(match, phy.noeth, true, is_mask);
if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
__be16 tci;
@@ -567,6 +570,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
const struct ovs_key_ipv4 *ipv4_key;
+ /* Add eth.type value for layer 3 flows */
+ if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
+ __be16 eth_type;
+
+ if (is_mask) {
+ eth_type = htons(0xffff);
+ } else {
+ eth_type = htons(ETH_P_IP);
+ }
+ SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+ }
+
ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
@@ -591,6 +606,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
const struct ovs_key_ipv6 *ipv6_key;
+ /* Add eth.type value for layer 3 flows */
+ if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
+ __be16 eth_type;
+
+ if (is_mask) {
+ eth_type = htons(0xffff);
+ } else {
+ eth_type = htons(ETH_P_IPV6);
+ }
+ SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+ }
+
ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
@@ -897,7 +924,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
- struct nlattr *nla, *encap;
+ struct nlattr *nla, *encap = NULL;
bool is_mask = (swkey != output);
if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
@@ -929,6 +956,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
+ if (swkey->phy.noeth)
+ goto noethernet;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
@@ -946,8 +976,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.tci)
goto unencap;
- } else
- encap = NULL;
+ }
if (swkey->eth.type == htons(ETH_P_802_2)) {
/*
@@ -966,6 +995,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
goto nla_put_failure;
+noethernet:
if (swkey->eth.type == htons(ETH_P_IP)) {
struct ovs_key_ipv4 *ipv4_key;
@@ -1301,7 +1331,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun)
+ bool *set_tun,
+ bool noeth)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
@@ -1322,7 +1353,11 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ break;
+
case OVS_KEY_ATTR_ETHERNET:
+ if (noeth)
+ return -EINVAL;
break;
case OVS_KEY_ATTR_TUNNEL:
@@ -1434,6 +1469,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
{
const struct nlattr *a;
int rem, err;
+ bool noeth = key->phy.noeth;
if (depth >= SAMPLE_ACTION_DEPTH)
return -EOVERFLOW;
@@ -1444,6 +1480,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+ [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
+ [OVS_ACTION_ATTR_POP_ETH] = 0,
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1488,10 +1526,22 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
break;
}
+ case OVS_ACTION_ATTR_POP_ETH:
+ if (noeth)
+ return -EINVAL;
+ noeth = true;
+ break;
+
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ noeth = false;
+ break;
+
case OVS_ACTION_ATTR_POP_VLAN:
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
+ if (noeth)
+ return -EINVAL;
vlan = nla_data(a);
if (vlan->vlan_tpid != htons(ETH_P_8021Q))
return -EINVAL;
@@ -1503,7 +1553,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
break;
case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa, &skip_copy);
+ err = validate_set(a, key, sfa, &skip_copy, noeth);
if (err)
return err;
break;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 5d5090c..5115b3a 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -112,7 +112,7 @@ static int gre_rcv(struct sk_buff *skb,
key = key_to_tunnel_id(tpi->key, tpi->seq);
ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags));
- ovs_vport_receive(vport, skb, &tun_key);
+ ovs_vport_receive(vport, skb, &tun_key, false);
return PACKET_RCVD;
}
@@ -289,6 +289,9 @@ static int gre_send(struct vport *vport, struct sk_buff *skb)
if (unlikely(!OVS_CB(skb)->tun_key))
return -EINVAL;
+ if (unlikely(OVS_CB(skb)->is_layer3))
+ return -EINVAL;
+
hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
return __send(vport, skb, hlen, 0, 0);
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index 637d712..afb9a67 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -76,7 +76,7 @@ static struct net_device_stats *internal_dev_sys_stats(struct net_device *netdev
static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
rcu_read_lock();
- ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
+ ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL, false);
rcu_read_unlock();
return 0;
}
@@ -236,6 +236,9 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
struct net_device *netdev = netdev_vport_priv(vport)->dev;
int len;
+ if (unlikely(OVS_CB(skb)->is_layer3))
+ return -EINVAL;
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
if (vlan_tx_tag_present(skb)) {
if (unlikely(!__vlan_put_tag(skb,
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index a1e2b2b..46256dd 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -219,8 +219,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
struct iphdr *iph, *inner_iph;
struct ovs_key_ipv4_tunnel tun_key;
__be64 key;
- struct ethhdr *ethh;
- __be16 protocol;
lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest);
if (unlikely(!lisp_port))
@@ -244,26 +242,16 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
inner_iph = (struct iphdr *)(lisph + 1);
switch (inner_iph->version) {
case 4:
- protocol = htons(ETH_P_IP);
+ skb->protocol = htons(ETH_P_IP);
break;
case 6:
- protocol = htons(ETH_P_IPV6);
+ skb->protocol = htons(ETH_P_IPV6);
break;
default:
goto error;
}
- skb->protocol = protocol;
- /* Add Ethernet header */
- ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
- memset(ethh, 0, ETH_HLEN);
- ethh->h_dest[0] = 0x02;
- ethh->h_source[0] = 0x02;
- ethh->h_proto = protocol;
-
- ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
-
- ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key);
+ ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key, true);
goto out;
error:
@@ -429,6 +417,9 @@ static int lisp_send(struct vport *vport, struct sk_buff *skb)
if (unlikely(!OVS_CB(skb)->tun_key))
return -EINVAL;
+ if (unlikely(!OVS_CB(skb)->is_layer3))
+ return -EINVAL;
+
if (skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
@@ -462,11 +453,6 @@ static int lisp_send(struct vport *vport, struct sk_buff *skb)
goto err_free_rt;
}
- /* Reset l2 headers. */
- skb_pull(skb, network_offset);
- skb_reset_mac_header(skb);
- vlan_set_tci(skb, 0);
-
skb_reset_inner_headers(skb);
__skb_push(skb, LISP_HLEN);
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index c15923b..1afef3f 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -209,7 +209,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
skb_push(skb, ETH_HLEN);
ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
- ovs_vport_receive(vport, skb, NULL);
+ ovs_vport_receive(vport, skb, NULL, false);
return;
error:
@@ -232,6 +232,9 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb)
int mtu = netdev_vport->dev->mtu;
int len;
+ if (unlikely(OVS_CB(skb)->is_layer3))
+ return -EINVAL;
+
if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
netdev_vport->dev->name,
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index cc9477d..9e79a9b 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -70,7 +70,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
key = cpu_to_be64(ntohl(vx_vni) >> 8);
ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
- ovs_vport_receive(vport, skb, &tun_key);
+ ovs_vport_receive(vport, skb, &tun_key, false);
}
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
@@ -155,6 +155,11 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
goto error;
}
+ if (unlikely(OVS_CB(skb)->is_layer3)) {
+ err = -EINVAL;
+ goto error;
+ }
+
/* Route lookup */
saddr = OVS_CB(skb)->tun_key->ipv4_src;
rt = find_route(ovs_dp_get_net(vport->dp),
diff --git a/datapath/vport.c b/datapath/vport.c
index 0dcecd0..a3fdeac 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -461,13 +461,15 @@ u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb)
* @vport: vport that received the packet
* @skb: skb that was received
* @tun_key: tunnel (if any) that carried packet
+ * @is_layer3: packet is layer 3
*
* Must be called with rcu_read_lock. The packet cannot be shared and
* skb->data should point to the Ethernet header. The caller must have already
* called compute_ip_summed() to initialize the checksumming fields.
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
- struct ovs_key_ipv4_tunnel *tun_key)
+ struct ovs_key_ipv4_tunnel *tun_key,
+ bool is_layer3)
{
struct pcpu_sw_netstats *stats;
@@ -478,6 +480,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
u64_stats_update_end(&stats->syncp);
OVS_CB(skb)->tun_key = tun_key;
+ OVS_CB(skb)->is_layer3 = is_layer3;
ovs_dp_process_received_packet(vport, skb);
}
diff --git a/datapath/vport.h b/datapath/vport.h
index cfaea09..cd2c7ba 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -211,7 +211,7 @@ static inline struct vport *vport_from_priv(void *priv)
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
- struct ovs_key_ipv4_tunnel *);
+ struct ovs_key_ipv4_tunnel *, bool);
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the top of vport.c. */
--
1.8.5.2 (Apple Git-48)
More information about the dev
mailing list