[ovs-dev] [PATCHv3 2/2] datapath: add ipv6 'set' action

Ansis Atteka aatteka at nicira.com
Sat Nov 10 12:34:34 UTC 2012


This patch adds ipv6 set action functionality. It allows to change
traffic class, flow label, hop-limit, ipv6 source and destination
address fields.

Signed-off-by: Ansis Atteka <aatteka at nicira.com>
---
 NEWS                |    1 +
 datapath/actions.c  |   94 +++++++++++++++++++++++++++++++
 datapath/checksum.h |   24 ++++++++
 datapath/datapath.c |   20 +++++++
 debian/changelog    |    1 +
 lib/csum.c          |   15 +++++
 lib/csum.h          |    2 +
 lib/dpif-netdev.c   |    9 ++-
 lib/packets.c       |  155 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/packets.h       |    3 +
 10 files changed, 323 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 646ce30..0372965 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ v1.9.0 - xx xxx xxxx
     - The tunneling code no longer assumes input and output keys are symmetric.
       If they are not, PMTUD needs to be disabled for tunneling to work. Note
       this only applies to flow-based keys.
+    - Datapath: Support for ipv6 set action.
     - FreeBSD is now a supported platform, thanks to code contributions from
       Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
     - ovs-bugtool: New --ovs option to report only OVS related information.
diff --git a/datapath/actions.c b/datapath/actions.c
index 8ec692d..76c9823 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -28,6 +28,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
 #include <net/ip.h>
+#include <net/ipv6.h>
 #include <net/checksum.h>
 #include <net/dsfield.h>
 
@@ -166,6 +167,54 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 	*addr = new_addr;
 }
 
+static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
+				 __be32 addr[4], const __be32 new_addr[4])
+{
+	int transport_len = skb->len - skb_transport_offset(skb);
+
+	if (l4_proto == IPPROTO_TCP) {
+		if (likely(transport_len >= sizeof(struct tcphdr)))
+			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
+						  addr, new_addr, 1);
+	} else if (l4_proto == IPPROTO_UDP) {
+		if (likely(transport_len >= sizeof(struct udphdr))) {
+			struct udphdr *uh = udp_hdr(skb);
+
+			if (uh->check ||
+			    get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
+				inet_proto_csum_replace16(&uh->check, skb,
+							  addr, new_addr, 1);
+				if (!uh->check)
+					uh->check = CSUM_MANGLED_0;
+			}
+		}
+	}
+}
+
+static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
+			  __be32 addr[4], const __be32 new_addr[4],
+			  bool recalculate_csum)
+{
+	if (recalculate_csum)
+		update_ipv6_checksum(skb, l4_proto, addr, new_addr);
+
+	skb_clear_rxhash(skb);
+	memcpy(addr, new_addr, sizeof(__be32[4]));
+}
+
+static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+{
+	nh->priority = tc >> 4;
+	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+}
+
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+{
+	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
+	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
+	nh->flow_lbl[2] = fl & 0x000000FF;
+}
+
 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
 {
 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
@@ -199,6 +248,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
 	return 0;
 }
 
+static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+{
+	struct ipv6hdr *nh;
+	int err;
+	__be32 *saddr;
+	__be32 *daddr;
+
+	err = make_writable(skb, skb_network_offset(skb) +
+			    sizeof(struct ipv6hdr));
+	if (unlikely(err))
+		return err;
+
+	nh = ipv6_hdr(skb);
+	saddr = (__be32 *)&nh->saddr;
+	daddr = (__be32 *)&nh->daddr;
+
+	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
+			      ipv6_key->ipv6_src, true);
+
+	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+		unsigned int offset = 0;
+		int flags = OVS_IP6T_FH_F_SKIP_RH;
+		bool recalc_csum = true;
+
+		if (ipv6_ext_hdr(nh->nexthdr))
+			recalc_csum = ipv6_find_hdr(skb, &offset,
+						    NEXTHDR_ROUTING, NULL,
+						    &flags) != NEXTHDR_ROUTING;
+
+		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
+			      ipv6_key->ipv6_dst, recalc_csum);
+	}
+
+	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
+	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
+	nh->hop_limit = ipv6_key->ipv6_hlimit;
+
+	return 0;
+}
+
 /* Must follow make_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
 			 __be16 new_port, __sum16 *check)
@@ -373,6 +463,10 @@ static int execute_set_action(struct sk_buff *skb,
 		err = set_ipv4(skb, nla_data(nested_attr));
 		break;
 
+	case OVS_KEY_ATTR_IPV6:
+		err = set_ipv6(skb, nla_data(nested_attr));
+		break;
+
 	case OVS_KEY_ATTR_TCP:
 		err = set_tcp(skb, nla_data(nested_attr));
 		break;
diff --git a/datapath/checksum.h b/datapath/checksum.h
index 2f2ffee..a440c59 100644
--- a/datapath/checksum.h
+++ b/datapath/checksum.h
@@ -102,6 +102,30 @@ static inline void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 }
 #endif
 
+#if defined(NEED_CSUM_NORMALIZE) || LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
+#define inet_proto_csum_replace16 rpl_inet_proto_csum_replace16
+static inline void inet_proto_csum_replace16(__sum16 *sum,
+					     struct sk_buff *skb,
+					     const __be32 *from,
+					     const __be32 *to,
+					     int pseudohdr)
+{
+	__be32 diff[] = {
+		~from[0], ~from[1], ~from[2], ~from[3],
+		to[0], to[1], to[2], to[3],
+	};
+	if (get_ip_summed(skb) != OVS_CSUM_PARTIAL) {
+		*sum = csum_fold(csum_partial(diff, sizeof(diff),
+				 ~csum_unfold(*sum)));
+		if (get_ip_summed(skb) == OVS_CSUM_COMPLETE && pseudohdr)
+			skb->csum = ~csum_partial(diff, sizeof(diff),
+						  ~skb->csum);
+	} else if (pseudohdr)
+		*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+				  csum_unfold(*sum)));
+}
+#endif
+
 #ifdef NEED_CSUM_NORMALIZE
 static inline void update_csum_start(struct sk_buff *skb, int delta)
 {
diff --git a/datapath/datapath.c b/datapath/datapath.c
index e88b1da..e359ac0 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -588,6 +588,7 @@ static int validate_set(const struct nlattr *a,
 	switch (key_type) {
 	const struct ovs_key_ipv4 *ipv4_key;
 	const struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv6 *ipv6_key;
 
 	case OVS_KEY_ATTR_PRIORITY:
 	case OVS_KEY_ATTR_TUN_ID:
@@ -616,6 +617,25 @@ static int validate_set(const struct nlattr *a,
 
 		break;
 
+	case OVS_KEY_ATTR_IPV6:
+		if (flow_key->eth.type != htons(ETH_P_IPV6))
+			return -EINVAL;
+
+		if (!flow_key->ip.proto)
+			return -EINVAL;
+
+		ipv6_key = nla_data(ovs_key);
+		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+			return -EINVAL;
+
+		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+			return -EINVAL;
+
+		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+			return -EINVAL;
+
+		break;
+
 	case OVS_KEY_ATTR_TCP:
 		if (flow_key->ip.proto != IPPROTO_TCP)
 			return -EINVAL;
diff --git a/debian/changelog b/debian/changelog
index ac5f158..b9518fb 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -11,6 +11,7 @@ openvswitch (1.9.0-1) unstable; urgency=low
     - The tunneling code no longer assumes input and output keys are symmetric.
       If they are not, PMTUD needs to be disabled for tunneling to work. Note
       this only applies to flow-based keys.
+    - Datapath: Support for ipv6 set action.
     - FreeBSD is now a supported platform, thanks to code contributions from
       Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
     - ovs-bugtool: New --ovs option to report only OVS related information.
diff --git a/lib/csum.c b/lib/csum.c
index 98a83de..fb32a53 100644
--- a/lib/csum.c
+++ b/lib/csum.c
@@ -112,6 +112,21 @@ recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32)
                          old_u32 >> 16, new_u32 >> 16);
 }
 
+/* Returns the new checksum for a packet in which the checksum field previously
+ * contained 'old_csum' and in which a field that contained 'old_u32[4]' was
+ * changed to contain 'new_u32[4]'. */
+ovs_be16
+recalc_csum128(ovs_be16 old_csum, ovs_be32 old_u32[4],
+               const ovs_be32 new_u32[4])
+{
+    ovs_be16 new_csum = old_csum;
+    int i;
+
+    for (i = 0; i < 4; ++i) {
+        new_csum = recalc_csum32(new_csum, old_u32[i], new_u32[i]);
+    }
+    return new_csum;
+}
 #else  /* __CHECKER__ */
 /* Making sparse happy with these functions also makes them unreadable, so
  * don't bother to show it their implementations. */
diff --git a/lib/csum.h b/lib/csum.h
index 12402d7..6382d29 100644
--- a/lib/csum.h
+++ b/lib/csum.h
@@ -28,5 +28,7 @@ uint32_t csum_continue(uint32_t partial, const void *, size_t);
 ovs_be16 csum_finish(uint32_t partial);
 ovs_be16 recalc_csum16(ovs_be16 old_csum, ovs_be16 old_u16, ovs_be16 new_u16);
 ovs_be16 recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32);
+ovs_be16 recalc_csum128(ovs_be16 old_csum, ovs_be32 old_u32[4],
+                        const ovs_be32 new_u32[4]);
 
 #endif /* csum.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a80b1b0..4ce4147 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1181,13 +1181,13 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
 {
     enum ovs_key_attr type = nl_attr_type(a);
     const struct ovs_key_ipv4 *ipv4_key;
+    const struct ovs_key_ipv6 *ipv6_key;
     const struct ovs_key_tcp *tcp_key;
     const struct ovs_key_udp *udp_key;
 
     switch (type) {
     case OVS_KEY_ATTR_TUN_ID:
     case OVS_KEY_ATTR_PRIORITY:
-    case OVS_KEY_ATTR_IPV6:
     case OVS_KEY_ATTR_IPV4_TUNNEL:
         /* not implemented */
         break;
@@ -1203,6 +1203,13 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
                         ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl);
         break;
 
+    case OVS_KEY_ATTR_IPV6:
+        ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6));
+        packet_set_ipv6(packet, ipv6_key->ipv6_proto, ipv6_key->ipv6_src,
+                        ipv6_key->ipv6_dst, ipv6_key->ipv6_tclass,
+                        ipv6_key->ipv6_label, ipv6_key->ipv6_hlimit);
+        break;
+
     case OVS_KEY_ATTR_TCP:
         tcp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp));
         packet_set_tcp_port(packet, tcp_key->tcp_src, tcp_key->tcp_dst);
diff --git a/lib/packets.c b/lib/packets.c
index 16f4fe6..fa73b50 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -20,6 +20,7 @@
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <netinet/ip6.h>
 #include <stdlib.h>
 #include "byte-order.h"
 #include "csum.h"
@@ -472,6 +473,133 @@ packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr)
     *addr = new_addr;
 }
 
+/* Returns true, if packet contains at least one routing header where
+ * segements_left > 0.
+ *
+ * This function assumes that L3 and L4 markers are set in the packet. */
+static bool
+packet_rh_present(struct ofpbuf *packet)
+{
+    const struct ip6_hdr *nh;
+    int nexthdr;
+    size_t len;
+    size_t remaining;
+    uint8_t *data = packet->l3;
+
+    remaining = (uint8_t *)packet->l4 - (uint8_t *)packet->l3;
+
+    if (remaining < sizeof *nh) {
+        return false;
+    }
+    nh = (struct ip6_hdr *)data;
+    data += sizeof *nh;
+    remaining -= sizeof *nh;
+    nexthdr = nh->ip6_nxt;
+
+    while (1) {
+        if ((nexthdr != IPPROTO_HOPOPTS)
+                && (nexthdr != IPPROTO_ROUTING)
+                && (nexthdr != IPPROTO_DSTOPTS)
+                && (nexthdr != IPPROTO_AH)
+                && (nexthdr != IPPROTO_FRAGMENT)) {
+            /* It's either a terminal header (e.g., TCP, UDP) or one we
+             * don't understand.  In either case, we're done with the
+             * packet, so use it to fill in 'nw_proto'. */
+            break;
+        }
+
+        /* We only verify that at least 8 bytes of the next header are
+         * available, but many of these headers are longer.  Ensure that
+         * accesses within the extension header are within those first 8
+         * bytes. All extension headers are required to be at least 8
+         * bytes. */
+        if (remaining < 8) {
+            return false;
+        }
+
+        if (nexthdr == IPPROTO_AH) {
+            /* A standard AH definition isn't available, but the fields
+             * we care about are in the same location as the generic
+             * option header--only the header length is calculated
+             * differently. */
+            const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
+
+            nexthdr = ext_hdr->ip6e_nxt;
+            len = (ext_hdr->ip6e_len + 2) * 4;
+        } else if (nexthdr == IPPROTO_FRAGMENT) {
+            const struct ip6_frag *frag_hdr = (struct ip6_frag *)data;
+
+            nexthdr = frag_hdr->ip6f_nxt;
+            len = sizeof *frag_hdr;
+        } else if (nexthdr == IPPROTO_ROUTING) {
+            const struct ip6_rthdr *rh = (struct ip6_rthdr *)data;
+
+            if (rh->ip6r_segleft > 0) {
+                return true;
+            }
+
+            nexthdr = rh->ip6r_nxt;
+            len = (rh->ip6r_len + 1) * 8;
+        } else {
+            const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
+
+            nexthdr = ext_hdr->ip6e_nxt;
+            len = (ext_hdr->ip6e_len + 1) * 8;
+        }
+
+        if (remaining < len) {
+            return false;
+        }
+        remaining -= len;
+        data += len;
+    }
+
+    return false;
+}
+
+static void
+packet_update_csum128(struct ofpbuf *packet, uint8_t proto,
+                     ovs_be32 addr[4], const ovs_be32 new_addr[4])
+{
+    if (proto == IPPROTO_TCP && packet->l7) {
+        struct tcp_header *th = packet->l4;
+
+        th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+    } else if (proto == IPPROTO_UDP && packet->l7) {
+        struct udp_header *uh = packet->l4;
+
+        if (uh->udp_csum) {
+            uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+            if (!uh->udp_csum) {
+                uh->udp_csum = htons(0xffff);
+            }
+        }
+    }
+}
+
+static void
+packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto,
+                     struct in6_addr *addr, const ovs_be32 new_addr[4],
+                     bool recalculate_csum)
+{
+    if (recalculate_csum) {
+        packet_update_csum128(packet, proto, (ovs_be32 *)addr, new_addr);
+    }
+    memcpy(addr, new_addr, sizeof(*addr));
+}
+
+static void
+packet_set_ipv6_flow_label(ovs_be32 *flow_label, ovs_be32 flow_key)
+{
+    *flow_label = (*flow_label & htonl(~IPV6_LABEL_MASK)) | flow_key;
+}
+
+static void
+packet_set_ipv6_tc(ovs_be32 *flow_label, uint8_t tc)
+{
+    *flow_label = (*flow_label & htonl(0xF00FFFFF)) | htonl(tc << 20);
+}
+
 /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src',
  * 'dst', 'tos', and 'ttl'.  Updates 'packet''s L4 checksums as appropriate.
  * 'packet' must contain a valid IPv4 packet with correctly populated l[347]
@@ -507,6 +635,33 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst,
     }
 }
 
+/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src',
+ * 'dst', 'traffic class', and 'next hop'.  Updates 'packet''s L4 checksums as
+ * appropriate. 'packet' must contain a valid IPv6 packet with correctly
+ * populated l[347] markers. */
+void
+packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4],
+                const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl,
+                uint8_t key_hl)
+{
+    struct ip6_hdr *nh = packet->l3;
+
+    if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) {
+        packet_set_ipv6_addr(packet, proto, &nh->ip6_src, src, true);
+    }
+
+    if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) {
+        packet_set_ipv6_addr(packet, proto, &nh->ip6_dst, dst,
+                             !packet_rh_present(packet));
+    }
+
+    packet_set_ipv6_tc(&nh->ip6_flow, key_tc);
+
+    packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl);
+
+    nh->ip6_hlim = key_hl;
+}
+
 static void
 packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
 {
diff --git a/lib/packets.h b/lib/packets.h
index e550be0..4ad527b 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -490,6 +490,9 @@ void *snap_compose(struct ofpbuf *, const uint8_t eth_dst[ETH_ADDR_LEN],
                    unsigned int oui, uint16_t snap_type, size_t size);
 void packet_set_ipv4(struct ofpbuf *, ovs_be32 src, ovs_be32 dst, uint8_t tos,
                      uint8_t ttl);
+void packet_set_ipv6(struct ofpbuf *, uint8_t proto, const ovs_be32 src[4],
+                     const ovs_be32 dst[4], uint8_t tc,
+                     uint32_t fl, uint8_t hlmit);
 void packet_set_tcp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);
 void packet_set_udp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst);
 
-- 
1.7.9.5




More information about the dev mailing list