[ovs-dev] [#8024 6/7] Support matching and modifying IP ECN bits.

Justin Pettit jpettit at nicira.com
Mon Nov 7 18:33:09 UTC 2011


Signed-off-by: Justin Pettit <jpettit at nicira.com>
---
 NEWS                          |    2 +
 datapath/actions.c            |   12 ++------
 datapath/datapath.c           |    4 ---
 datapath/flow.c               |    9 +++---
 include/openflow/nicira-ext.h |   25 +++++++++++++++++-
 lib/classifier.c              |   15 +++++++++-
 lib/classifier.h              |    3 +-
 lib/flow.c                    |   12 +++-----
 lib/meta-flow.c               |   57 +++++++++++++++++++++++++++++++----------
 lib/meta-flow.h               |    3 +-
 lib/nx-match.c                |   16 +++++++++++
 lib/nx-match.def              |    3 +-
 lib/nx-match.h                |    3 +-
 lib/odp-util.c                |    8 +++---
 lib/ofp-parse.c               |    4 +++
 lib/ofp-print.c               |    6 ++++
 lib/ofp-util.c                |    6 ++++
 lib/ofp-util.def              |    1 +
 ofproto/ofproto-dpif.c        |    9 ++++++-
 tests/odp.at                  |   34 +++++++++++++-----------
 tests/ovs-ofctl.at            |   12 ++++++++
 utilities/ovs-ofctl.8.in      |   12 ++++++++
 22 files changed, 189 insertions(+), 67 deletions(-)

diff --git a/NEWS b/NEWS
index 51e00cf..d13c906 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,8 @@ post-v1.3.0
 ------------------------
     - OpenFlow:
        - Added ability to match on IPv6 flow label through NXM.
+       - Added ability to match on ECN bits in IPv4 and IPv6 through NXM.
+       - Added ability to modify ECN bits in IPv4 and IPv6.
     - ovs-appctl:
       - New "fdb/flush" command to flush bridge's MAC learning table.
 
diff --git a/datapath/actions.c b/datapath/actions.c
index efe5e98..d8156a0 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -19,7 +19,6 @@
 #include <linux/in6.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
-#include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/checksum.h>
 
@@ -153,14 +152,9 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 
 static void set_ip_tos(struct sk_buff *skb, struct iphdr *nh, u8 new_tos)
 {
-	u8 old, new;
-
-	/* Set the DSCP bits and preserve the ECN bits. */
-	old = nh->tos;
-	new = new_tos | (nh->tos & INET_ECN_MASK);
-	csum_replace4(&nh->check, (__force __be32)htons(old),
-				  (__force __be32)htons(new));
-	nh->tos = new;
+	csum_replace4(&nh->check, (__force __be32)htons(nh->tos),
+				  (__force __be32)htons(new_tos));
+	nh->tos = new_tos;
 }
 
 static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
diff --git a/datapath/datapath.c b/datapath/datapath.c
index c6c6f7f..b0f80bd 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -42,7 +42,6 @@
 #include <linux/openvswitch.h>
 #include <linux/rculist.h>
 #include <linux/dmi.h>
-#include <net/inet_ecn.h>
 #include <net/genetlink.h>
 
 #include "checksum.h"
@@ -583,9 +582,6 @@ static int validate_action_key(const struct nlattr *a,
 		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
 			return -EINVAL;
 
-		if (ipv4_key->ipv4_tos & INET_ECN_MASK)
-			return -EINVAL;
-
 		if (ipv4_key->ipv4_frag !=
 		    (flow_key->ip.frag & OVS_FRAG_TYPE_MASK))
 			return -EINVAL;
diff --git a/datapath/flow.c b/datapath/flow.c
index 5e528a3..f79c577 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -30,7 +30,6 @@
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
 #include <linux/rculist.h>
-#include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ndisc.h>
@@ -201,7 +200,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
 	payload_ofs = (u8 *)(nh + 1) - skb->data;
 
 	key->ip.proto = NEXTHDR_NONE;
-	key->ip.tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+	key->ip.tos = ipv6_get_dsfield(nh);
 	key->ipv6.label = *(u32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
 	ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr);
 	ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr);
@@ -689,7 +688,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		key->ipv4.addr.dst = nh->daddr;
 
 		key->ip.proto = nh->protocol;
-		key->ip.tos = nh->tos & ~INET_ECN_MASK;
+		key->ip.tos = nh->tos;
 
 		offset = nh->frag_off & htons(IP_OFFSET);
 		if (offset) {
@@ -1247,7 +1246,7 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
 		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
 		ipv4_key->ipv4_proto = swkey->ip.proto;
-		ipv4_key->ipv4_tos = swkey->ip.tos & ~INET_ECN_MASK;
+		ipv4_key->ipv4_tos = swkey->ip.tos;
 		ipv4_key->ipv4_frag = swkey->ip.frag & OVS_FRAG_TYPE_MASK;
 	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
 		struct ovs_key_ipv6 *ipv6_key;
@@ -1263,7 +1262,7 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 				sizeof(ipv6_key->ipv6_dst));
 		ipv6_key->ipv6_label = swkey->ipv6.label;
 		ipv6_key->ipv6_proto = swkey->ip.proto;
-		ipv6_key->ipv6_tos = swkey->ip.tos & ~INET_ECN_MASK;
+		ipv6_key->ipv6_tos = swkey->ip.tos;
 		ipv6_key->ipv6_frag = swkey->ip.frag & OVS_FRAG_TYPE_MASK;
 	} else if (swkey->eth.type == htons(ETH_P_ARP)) {
 		struct ovs_key_arp *arp_key;
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index a2e383d..604a061 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -296,7 +296,8 @@ enum nx_action_subtype {
     NXAST_RESUBMIT_TABLE,       /* struct nx_action_resubmit */
     NXAST_OUTPUT_REG,           /* struct nx_action_output_reg */
     NXAST_LEARN,                /* struct nx_action_learn */
-    NXAST_EXIT                  /* struct nx_action_header */
+    NXAST_EXIT,                 /* struct nx_action_header */
+    NXAST_SET_NW_ECN            /* struct nx_action_set_nw_ecn */
 };
 
 /* Header for Nicira-defined actions. */
@@ -1060,6 +1061,19 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
  *
  * Uses the nx_action_header structure. */
 
+/* Action structure for NXAST_SET_NW_ECN.
+ *
+ * Set the ECN bits of the IP header to 'nw_ecn'. */
+struct nx_action_set_nw_ecn {
+    ovs_be16 type;              /* OFPAT_VENDOR. */
+    ovs_be16 len;               /* Length is 16. */
+    ovs_be32 vendor;            /* NX_VENDOR_ID. */
+    ovs_be16 subtype;           /* NXAST_SET_NW_ECN. */
+    uint8_t nw_ecn;             /* New IP ECN. */
+    uint8_t pad[5];
+};
+OFP_ASSERT(sizeof(struct nx_action_set_nw_ecn) == 16);
+
 /* Flexible flow specifications (aka NXM = Nicira Extended Match).
  *
  * OpenFlow 1.0 has "struct ofp_match" for specifying flow matches.  This
@@ -1623,6 +1637,15 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
  * Masking: Not maskable. */
 #define NXM_NX_IPV6_LABEL  NXM_HEADER  (0x0001, 27, 4)
 
+/* The ECN of the IP header.
+ *
+ * Prereqs: NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd.
+ *
+ * Format: 2-bit ECN.
+ *
+ * Masking: Not maskable. */
+#define NXM_NX_IP_ECN      NXM_HEADER  (0x0001, 28, 1)
+
 /* ## --------------------- ## */
 /* ## Requests and replies. ## */
 /* ## --------------------- ## */
diff --git a/lib/classifier.c b/lib/classifier.c
index 66e37f2..4d81841 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -318,11 +318,19 @@ cls_rule_set_nw_dst_masked(struct cls_rule *rule, ovs_be32 ip, ovs_be32 mask)
 }
 
 void
-cls_rule_set_nw_tos(struct cls_rule *rule, uint8_t nw_tos)
+cls_rule_set_nw_dscp(struct cls_rule *rule, uint8_t nw_dscp)
 {
     rule->wc.tos_mask |= IP_DSCP_MASK;
     rule->flow.tos &= ~IP_DSCP_MASK;
-    rule->flow.tos |= nw_tos & IP_DSCP_MASK;
+    rule->flow.tos |= nw_dscp & IP_DSCP_MASK;
+}
+
+void
+cls_rule_set_nw_ecn(struct cls_rule *rule, uint8_t nw_ecn)
+{
+    rule->wc.tos_mask |= IP_ECN_MASK;
+    rule->flow.tos &= ~IP_ECN_MASK;
+    rule->flow.tos |= nw_ecn & IP_ECN_MASK;
 }
 
 void
@@ -623,6 +631,9 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s)
     if (wc->tos_mask & IP_DSCP_MASK) {
         ds_put_format(s, "nw_tos=%"PRIu8",", f->tos & IP_DSCP_MASK);
     }
+    if (wc->tos_mask & IP_ECN_MASK) {
+        ds_put_format(s, "nw_ecn=%"PRIu8",", f->tos & IP_ECN_MASK);
+    }
     switch (wc->frag_mask & FLOW_FRAG_MASK) {
     case FLOW_FRAG_ANY | FLOW_FRAG_LATER:
         ds_put_format(s, "frag=%s,",
diff --git a/lib/classifier.h b/lib/classifier.h
index a61d0e6..581ee83 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -116,7 +116,8 @@ void cls_rule_set_nw_src(struct cls_rule *, ovs_be32);
 bool cls_rule_set_nw_src_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask);
 void cls_rule_set_nw_dst(struct cls_rule *, ovs_be32);
 bool cls_rule_set_nw_dst_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask);
-void cls_rule_set_nw_tos(struct cls_rule *, uint8_t);
+void cls_rule_set_nw_dscp(struct cls_rule *, uint8_t);
+void cls_rule_set_nw_ecn(struct cls_rule *, uint8_t);
 void cls_rule_set_frag(struct cls_rule *, uint8_t frag);
 void cls_rule_set_frag_masked(struct cls_rule *, uint8_t frag, uint8_t mask);
 void cls_rule_set_icmp_type(struct cls_rule *, uint8_t);
diff --git a/lib/flow.c b/lib/flow.c
index f87a0f8..82b22c1 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -148,7 +148,7 @@ parse_ipv6(struct ofpbuf *packet, struct flow *flow)
     flow->ipv6_dst = nh->ip6_dst;
 
     tc_flow = get_unaligned_be32(&nh->ip6_flow);
-    flow->tos = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK;
+    flow->tos = ntohl(tc_flow) >> 4;
     flow->ipv6_label = tc_flow & htonl(IPV6_LABEL_MASK);
     flow->nw_proto = IPPROTO_NONE;
 
@@ -370,7 +370,7 @@ flow_extract(struct ofpbuf *packet, uint32_t priority, ovs_be64 tun_id,
             flow->nw_dst = get_unaligned_be32(&nh->ip_dst);
             flow->nw_proto = nh->ip_proto;
 
-            flow->tos = nh->ip_tos & IP_DSCP_MASK;
+            flow->tos = nh->ip_tos;
             if (IP_IS_FRAGMENT(nh->ip_frag_off)) {
                 flow->frag |= FLOW_FRAG_ANY;
                 if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
@@ -529,8 +529,7 @@ flow_format(struct ds *ds, const struct flow *flow)
 
     if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         ds_put_format(ds, " label%#"PRIx32" proto%"PRIu8" tos%"PRIu8" ipv6",
-                      ntohl(flow->ipv6_label), flow->nw_proto,
-                      flow->tos & IP_DSCP_MASK);
+                      ntohl(flow->ipv6_label), flow->nw_proto, flow->tos);
         print_ipv6_addr(ds, &flow->ipv6_src);
         ds_put_cstr(ds, "->");
         print_ipv6_addr(ds, &flow->ipv6_dst);
@@ -539,8 +538,7 @@ flow_format(struct ds *ds, const struct flow *flow)
         ds_put_format(ds, " proto%"PRIu8
                           " tos%"PRIu8
                           " ip"IP_FMT"->"IP_FMT,
-                      flow->nw_proto,
-                      flow->tos & IP_DSCP_MASK,
+                      flow->nw_proto, flow->tos,
                       IP_ARGS(&flow->nw_src),
                       IP_ARGS(&flow->nw_dst));
     }
@@ -1020,7 +1018,7 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
 
         b->l3 = ip = ofpbuf_put_zeros(b, sizeof *ip);
         ip->ip_ihl_ver = IP_IHL_VER(5, 4);
-        ip->ip_tos = flow->tos & IP_DSCP_MASK;
+        ip->ip_tos = flow->tos;
         ip->ip_proto = flow->nw_proto;
         ip->ip_src = flow->nw_src;
         ip->ip_dst = flow->nw_dst;
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 9657968..c1eb753 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -188,13 +188,20 @@ static const struct mf_field mf_fields[MFF_N_IDS] = {
         MFP_IP_ANY,
         NXM_OF_IP_PROTO,
     }, {
-        MFF_IP_TOS, "nw_tos", NULL,
+        MFF_IP_DSCP, "nw_tos", NULL,
         MF_FIELD_SIZES(u8),
         MFM_NONE, 0,
         MFS_DECIMAL,
         MFP_IP_ANY,
         NXM_OF_IP_TOS,
     }, {
+        MFF_IP_ECN, "nw_ecn", NULL,
+        MF_FIELD_SIZES(u8),
+        MFM_NONE, 0,
+        MFS_DECIMAL,
+        MFP_IP_ANY,
+        NXM_NX_IP_ECN,
+    }, {
         MFF_IP_FRAG, "ip_frag", NULL,
         1, 2,
         MFM_FULLY, 0,
@@ -422,8 +429,10 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
     case MFF_IPV6_DST:
         return ipv6_mask_is_any(&wc->ipv6_dst_mask);
 
-    case MFF_IP_TOS:
+    case MFF_IP_DSCP:
         return !(wc->tos_mask & IP_DSCP_MASK);
+    case MFF_IP_ECN:
+        return !(wc->tos_mask & IP_ECN_MASK);
     case MFF_IP_FRAG:
         return !(wc->frag_mask & FLOW_FRAG_MASK);
 
@@ -524,9 +533,12 @@ mf_get_mask(const struct mf_field *mf, const struct flow_wildcards *wc,
         mask->ipv6 = wc->ipv6_dst_mask;
         break;
 
-    case MFF_IP_TOS:
+    case MFF_IP_DSCP:
         mask->u8 = wc->tos_mask & IP_DSCP_MASK;
         break;
+    case MFF_IP_ECN:
+        mask->u8 = wc->tos_mask & IP_ECN_MASK;
+        break;
     case MFF_IP_FRAG:
         mask->u8 = wc->frag_mask & FLOW_FRAG_MASK;
         break;
@@ -642,8 +654,7 @@ mf_are_prereqs_ok(const struct mf_field *mf, const struct flow *flow)
  * all.  For example, the MFF_VLAN_TCI field will never have a nonzero value
  * without the VLAN_CFI bit being set, but we can't reject those values because
  * it is still legitimate to test just for those bits (see the documentation
- * for NXM_OF_VLAN_TCI in nicira-ext.h).  On the other hand, there is never a
- * reason to set the low bit of MFF_IP_TOS to 1, so we reject that. */
+ * for NXM_OF_VLAN_TCI in nicira-ext.h). */
 bool
 mf_is_value_valid(const struct mf_field *mf, const union mf_value *value)
 {
@@ -690,10 +701,10 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value)
     case MFF_ND_TARGET:
     case MFF_ND_SLL:
     case MFF_ND_TLL:
+    case MFF_IP_DSCP:
+    case MFF_IP_ECN:
         return true;
 
-    case MFF_IP_TOS:
-        return !(value->u8 & ~IP_DSCP_MASK);
     case MFF_IP_FRAG:
         return !(value->u8 & ~FLOW_FRAG_MASK);
 
@@ -799,10 +810,14 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow,
         value->u8 = flow->nw_proto;
         break;
 
-    case MFF_IP_TOS:
+    case MFF_IP_DSCP:
         value->u8 = flow->tos & IP_DSCP_MASK;
         break;
 
+    case MFF_IP_ECN:
+        value->u8 = flow->tos & IP_ECN_MASK;
+        break;
+
     case MFF_IP_FRAG:
         value->u8 = flow->frag & FLOW_FRAG_MASK;
         break;
@@ -950,8 +965,12 @@ mf_set_value(const struct mf_field *mf,
         cls_rule_set_nw_proto(rule, value->u8);
         break;
 
-    case MFF_IP_TOS:
-        cls_rule_set_nw_tos(rule, value->u8);
+    case MFF_IP_DSCP:
+        cls_rule_set_nw_dscp(rule, value->u8);
+        break;
+
+    case MFF_IP_ECN:
+        cls_rule_set_nw_ecn(rule, value->u8);
         break;
 
     case MFF_IP_FRAG:
@@ -1117,11 +1136,16 @@ mf_set_wild(const struct mf_field *mf, struct cls_rule *rule)
         rule->flow.nw_proto = 0;
         break;
 
-    case MFF_IP_TOS:
+    case MFF_IP_DSCP:
         rule->wc.tos_mask |= IP_DSCP_MASK;
         rule->flow.tos &= ~IP_DSCP_MASK;
         break;
 
+    case MFF_IP_ECN:
+        rule->wc.tos_mask |= IP_ECN_MASK;
+        rule->flow.tos &= ~IP_ECN_MASK;
+        break;
+
     case MFF_IP_FRAG:
         rule->wc.frag_mask |= FLOW_FRAG_MASK;
         rule->flow.frag &= ~FLOW_FRAG_MASK;
@@ -1201,7 +1225,8 @@ mf_set(const struct mf_field *mf,
     case MFF_VLAN_PCP:
     case MFF_IPV6_LABEL:
     case MFF_IP_PROTO:
-    case MFF_IP_TOS:
+    case MFF_IP_DSCP:
+    case MFF_IP_ECN:
     case MFF_ARP_OP:
     case MFF_ARP_SHA:
     case MFF_ARP_THA:
@@ -1424,8 +1449,12 @@ mf_random_value(const struct mf_field *mf, union mf_value *value)
         value->be32 &= ~htonl(IPV6_LABEL_MASK);
         break;
 
-    case MFF_IP_TOS:
-        value->u8 &= ~0x03;
+    case MFF_IP_DSCP:
+        value->u8 &= IP_DSCP_MASK;
+        break;
+
+    case MFF_IP_ECN:
+        value->u8 &= IP_ECN_MASK;
         break;
 
     case MFF_IP_FRAG:
diff --git a/lib/meta-flow.h b/lib/meta-flow.h
index 6e0134c..42523fc 100644
--- a/lib/meta-flow.h
+++ b/lib/meta-flow.h
@@ -70,7 +70,8 @@ enum mf_field_id {
     MFF_IPV6_LABEL,             /* be32 */
 
     MFF_IP_PROTO,               /* u8 (used for IPv4 or IPv6) */
-    MFF_IP_TOS,                 /* u8 (used for IPv4 or IPv6) */
+    MFF_IP_DSCP,                /* u8 (used for IPv4 or IPv6) */
+    MFF_IP_ECN,                 /* u8 (used for IPv4 or IPv6) */
     MFF_IP_FRAG,                /* u8 (used for IPv4 or IPv6) */
 
     MFF_ARP_OP,                 /* be16 */
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 7568fba..c7ad0d0 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -494,6 +494,10 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr)
             nxm_put_8(b, NXM_OF_IP_TOS, flow->tos & IP_DSCP_MASK);
         }
 
+        if (cr->wc.tos_mask & IP_ECN_MASK) {
+            nxm_put_8(b, NXM_NX_IP_ECN, flow->tos & IP_ECN_MASK);
+        }
+
         if (!(wc & FWW_NW_PROTO)) {
             nxm_put_8(b, NXM_OF_IP_PROTO, flow->nw_proto);
             switch (flow->nw_proto) {
@@ -544,6 +548,10 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr)
             nxm_put_8(b, NXM_OF_IP_TOS, flow->tos & IP_DSCP_MASK);
         }
 
+        if (cr->wc.tos_mask & IP_ECN_MASK) {
+            nxm_put_8(b, NXM_NX_IP_ECN, flow->tos & IP_ECN_MASK);
+        }
+
         if (!(wc & FWW_NW_PROTO)) {
             nxm_put_8(b, NXM_OF_IP_PROTO, flow->nw_proto);
             switch (flow->nw_proto) {
@@ -1049,6 +1057,9 @@ nxm_read_field(const struct nxm_field *src, const struct flow *flow)
     case NFI_NXM_OF_IP_TOS:
         return flow->tos & IP_DSCP_MASK;
 
+    case NFI_NXM_NX_IP_ECN:
+        return flow->tos & IP_ECN_MASK;
+
     case NFI_NXM_NX_IP_FRAG:
         return flow->frag & FLOW_FRAG_MASK;
 
@@ -1204,6 +1215,11 @@ nxm_write_field(const struct nxm_field *dst, struct flow *flow,
         flow->tos |= new_value & IP_DSCP_MASK;
         break;
 
+    case NFI_NXM_NX_IP_ECN:
+        flow->tos &= ~IP_ECN_MASK;
+        flow->tos |= new_value & IP_ECN_MASK;
+        break;
+
     case NFI_NXM_NX_IP_FRAG:
         flow->frag &= ~FLOW_FRAG_MASK;
         flow->frag |= new_value & FLOW_FRAG_MASK;
diff --git a/lib/nx-match.def b/lib/nx-match.def
index 89efdad..af9e629 100644
--- a/lib/nx-match.def
+++ b/lib/nx-match.def
@@ -26,7 +26,7 @@ DEFINE_FIELD_M(OF_ETH_DST,    MFF_ETH_DST,    true)
 DEFINE_FIELD  (OF_ETH_SRC,    MFF_ETH_SRC,    true)
 DEFINE_FIELD  (OF_ETH_TYPE,   MFF_ETH_TYPE,  false)
 DEFINE_FIELD_M(OF_VLAN_TCI,   MFF_VLAN_TCI,   true)
-DEFINE_FIELD  (OF_IP_TOS,     MFF_IP_TOS,     true)
+DEFINE_FIELD  (OF_IP_TOS,     MFF_IP_DSCP,    true)
 DEFINE_FIELD  (OF_IP_PROTO,   MFF_IP_PROTO,  false)
 DEFINE_FIELD_M(OF_IP_SRC,     MFF_IPV4_SRC,   true)
 DEFINE_FIELD_M(OF_IP_DST,     MFF_IPV4_DST,   true)
@@ -45,6 +45,7 @@ DEFINE_FIELD  (NX_ARP_THA,    MFF_ARP_THA,   false)
 DEFINE_FIELD_M(NX_IPV6_SRC,   MFF_IPV6_SRC,  false)
 DEFINE_FIELD_M(NX_IPV6_DST,   MFF_IPV6_DST,  false)
 DEFINE_FIELD  (NX_IPV6_LABEL, MFF_IPV6_LABEL,false)
+DEFINE_FIELD  (NX_IP_ECN,     MFF_IP_ECN,    true)
 /* XXX should we have MFF_ICMPV4_TYPE and MFF_ICMPV6_TYPE? */
 DEFINE_FIELD  (NX_ICMPV6_TYPE,MFF_ICMP_TYPE, false)
 DEFINE_FIELD  (NX_ICMPV6_CODE,MFF_ICMP_CODE, false)
diff --git a/lib/nx-match.h b/lib/nx-match.h
index 42fbed8..97eab9b 100644
--- a/lib/nx-match.h
+++ b/lib/nx-match.h
@@ -102,6 +102,7 @@ nxm_decode_n_bits(ovs_be16 ofs_nbits)
  *  NXM_OF_ETH_TYPE     4       2    --      6
  *  NXM_OF_VLAN_TCI     4       2     2      8
  *  NXM_OF_IP_TOS       4       1    --      5
+ *  NXM_NX_IP_ECN       4       1    --      5
  *  NXM_NX_IP_FRAG      4       1     1      8
  *  NXM_OF_IP_PROTO     4       2    --      6
  *  NXM_OF_IPV6_SRC_W   4      16    16     36
@@ -118,7 +119,7 @@ nxm_decode_n_bits(ovs_be16 ofs_nbits)
  *  NXM_NX_REG_W(4)     4       4     4     12
  *  NXM_NX_TUN_ID_W     4       8     8     20
  *  -------------------------------------------
- *  total                                  264
+ *  total                                  269
  *
  * So this value is conservative.
  */
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 2853bf7..29abce4 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -370,7 +370,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
     case OVS_KEY_ATTR_IPV4:
         ipv4_key = nl_attr_get(a);
         ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT","
-                      "proto=%"PRId8",tos=%"PRIu8",frag=%s)",
+                      "proto=%"PRId8",tos=0x%"PRIx8",frag=%s)",
                       IP_ARGS(&ipv4_key->ipv4_src),
                       IP_ARGS(&ipv4_key->ipv4_dst),
                       ipv4_key->ipv4_proto, ipv4_key->ipv4_tos,
@@ -386,7 +386,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
         inet_ntop(AF_INET6, ipv6_key->ipv6_dst, dst_str, sizeof dst_str);
 
         ds_put_format(ds, "ipv6(src=%s,dst=%s,label=0x%"PRIx32",proto=%"PRId8
-                      ",tos=%"PRIu8",frag=%s)",
+                      ",tos=0x%"PRIx8",frag=%s)",
                       src_str, dst_str, ntohl(ipv6_key->ipv6_label),
                       ipv6_key->ipv6_proto, ipv6_key->ipv6_tos,
                       ovs_frag_type_to_string(ipv6_key->ipv6_frag));
@@ -876,7 +876,7 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
         ipv4_key->ipv4_src = flow->nw_src;
         ipv4_key->ipv4_dst = flow->nw_dst;
         ipv4_key->ipv4_proto = flow->nw_proto;
-        ipv4_key->ipv4_tos = flow->tos & IP_DSCP_MASK;
+        ipv4_key->ipv4_tos = flow->tos;
         ipv4_key->ipv4_frag = ovs_to_odp_frag(flow->frag);
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         struct ovs_key_ipv6 *ipv6_key;
@@ -888,7 +888,7 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
         memcpy(ipv6_key->ipv6_dst, &flow->ipv6_dst, sizeof ipv6_key->ipv6_dst);
         ipv6_key->ipv6_label = flow->ipv6_label;
         ipv6_key->ipv6_proto = flow->nw_proto;
-        ipv6_key->ipv6_tos = flow->tos & IP_DSCP_MASK;
+        ipv6_key->ipv6_tos = flow->tos;
         ipv6_key->ipv6_frag = ovs_to_odp_frag(flow->frag);
     } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
         struct ovs_key_arp *arp_key;
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 0146a33..c627216 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -363,6 +363,10 @@ parse_named_action(enum ofputil_action_code code, const struct flow *flow,
     case OFPUTIL_NXAST_EXIT:
         ofputil_put_NXAST_EXIT(b);
         break;
+
+    case OFPUTIL_NXAST_SET_NW_ECN:
+        ofputil_put_NXAST_SET_NW_ECN(b)->nw_ecn = str_to_u32(arg);
+        break;
     }
 }
 
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index fe852b4..fdda13b 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -183,6 +183,7 @@ ofp_print_action(struct ds *s, const union ofp_action *a,
     const struct nx_action_multipath *nam;
     const struct nx_action_autopath *naa;
     const struct nx_action_output_reg *naor;
+    const struct nx_action_set_nw_ecn *nasecn;
     uint16_t port;
 
     switch (code) {
@@ -342,6 +343,11 @@ ofp_print_action(struct ds *s, const union ofp_action *a,
         ds_put_cstr(s, "exit");
         break;
 
+    case OFPUTIL_NXAST_SET_NW_ECN:
+        nasecn = (const struct nx_action_set_nw_ecn *) a;
+        ds_put_format(s, "mod_nw_ecn:%d", nasecn->nw_ecn);
+        break;
+
     default:
         break;
     }
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 05e3cc4..ad2964b 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -897,6 +897,11 @@ ofputil_min_flow_format(const struct cls_rule *rule)
         return NXFF_NXM;
     }
 
+    /* Only NXM supports matching IP ECN bits. */
+    if (wc->tos_mask & IP_ECN_MASK) {
+        return NXFF_NXM;
+    }
+
     /* Other formats can express this rule. */
     return NXFF_OPENFLOW10;
 }
@@ -2259,6 +2264,7 @@ validate_actions(const union ofp_action *actions, size_t n_actions,
         case OFPUTIL_NXAST_NOTE:
         case OFPUTIL_NXAST_SET_TUNNEL64:
         case OFPUTIL_NXAST_EXIT:
+        case OFPUTIL_NXAST_SET_NW_ECN:
             break;
         }
 
diff --git a/lib/ofp-util.def b/lib/ofp-util.def
index 2958eb6..ac01d94 100644
--- a/lib/ofp-util.def
+++ b/lib/ofp-util.def
@@ -36,4 +36,5 @@ NXAST_ACTION(NXAST_RESUBMIT_TABLE, nx_action_resubmit,     0, NULL)
 NXAST_ACTION(NXAST_OUTPUT_REG,     nx_action_output_reg,   0, NULL)
 NXAST_ACTION(NXAST_LEARN,          nx_action_learn,        1, "learn")
 NXAST_ACTION(NXAST_EXIT,           nx_action_header,       0, "exit")
+NXAST_ACTION(NXAST_SET_NW_ECN,     nx_action_set_nw_ecn,   0, "mod_nw_ecn")
 #undef NXAST_ACTION
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 66852e9..8176eed 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3680,7 +3680,7 @@ commit_set_nw_action(const struct flow *flow, struct flow *base,
     ipv4_key.ipv4_src = base->nw_src = flow->nw_src;
     ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst;
     ipv4_key.ipv4_proto = base->nw_proto;
-    ipv4_key.ipv4_tos = flow->tos & IP_DSCP_MASK;
+    ipv4_key.ipv4_tos = flow->tos;
     ipv4_key.ipv4_frag = (frag == 0 ? OVS_FRAG_TYPE_NONE
                           : frag == FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST
                           : OVS_FRAG_TYPE_LATER);
@@ -4120,6 +4120,7 @@ do_xlate_actions(const union ofp_action *in, size_t n_in,
         const struct nx_action_autopath *naa;
         const struct nx_action_bundle *nab;
         const struct nx_action_output_reg *naor;
+        const struct nx_action_set_nw_ecn *nasecn;
         enum ofputil_action_code code;
         ovs_be64 tun_id;
 
@@ -4266,6 +4267,12 @@ do_xlate_actions(const union ofp_action *in, size_t n_in,
         case OFPUTIL_NXAST_EXIT:
             ctx->exit = true;
             break;
+
+        case OFPUTIL_NXAST_SET_NW_ECN:
+            nasecn = (const struct nx_action_set_nw_ecn *) ia;
+            ctx->flow.tos &= ~IP_ECN_MASK;
+            ctx->flow.tos |= nasecn->nw_ecn & IP_ECN_MASK;
+            break;
         }
     }
 
diff --git a/tests/odp.at b/tests/odp.at
index 9da93c6..edbd401 100644
--- a/tests/odp.at
+++ b/tests/odp.at
@@ -4,22 +4,24 @@ AT_SETUP([OVS datapath parsing and formatting - valid forms])
 AT_DATA([odp-base.txt], [dnl
 in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15)
 in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=no)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=first)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=later)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,frag=no),tcp(src=80,dst=8080)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,frag=no),udp(src=81,dst=6632)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,frag=no),icmp(type=1,code=2)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=112,frag=no)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=112,frag=first)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=112,frag=later)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=6,tos=0,frag=no),tcp(src=80,dst=8080)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=17,tos=0,frag=no),udp(src=6630,dst=22)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0,frag=no),icmpv6(type=1,code=2)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x81,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,frag=first)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,frag=later)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0x0,frag=no),tcp(src=80,dst=8080)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0x0,frag=no),udp(src=81,dst=6632)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0x0,frag=no),icmp(type=1,code=2)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=0x70,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=0x71,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=0x70,frag=first)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=10,tos=0x70,frag=later)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=6,tos=0x0,frag=no),tcp(src=80,dst=8080)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=17,tos=0x0,frag=no),udp(src=6630,dst=22)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0x0,frag=no),icmpv6(type=1,code=2)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0x0,frag=no),icmpv6(type=135,code=0),nd(target=::3)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0x0,frag=no),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0x0,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0x0,proto=58,tos=0x0,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
 in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)
 ])
 
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 23a4d8c..d127215 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -211,8 +211,14 @@ NXM_OF_VLAN_TCI_W(0000/e000)    # No 802.1Q or with VID=0
 
 # IP TOS
 NXM_OF_ETH_TYPE(0800) NXM_OF_IP_TOS(f0)
+NXM_OF_ETH_TYPE(0800) NXM_OF_IP_TOS(41)  # The ECN bits should be stripped.
 NXM_OF_IP_TOS(f0)
 
+# IP ECN
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_ECN(03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_ECN(06)  # The DSCP bits should be stripped.
+NXM_NX_IP_ECN(03)
+
 # IP protocol
 NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(01)
 NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(05)
@@ -389,6 +395,12 @@ NXM_OF_VLAN_TCI_W(0000/e000)
 
 # IP TOS
 NXM_OF_ETH_TYPE(0800), NXM_OF_IP_TOS(f0)
+NXM_OF_ETH_TYPE(0800), NXM_OF_IP_TOS(40)
+nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ)
+
+# IP ECN
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_ECN(03)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_ECN(02)
 nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ)
 
 # IP protocol
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index 2bf3740..f62ce18 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -408,6 +408,14 @@ When \fBdl_type\fR is wildcarded or set to a value other than 0x0800 or
 0x86dd, the value of \fBnw_tos\fR is ignored (see \fBFlow Syntax\fR
 above).
 .
+.IP \fBnw_ecn=\fIecn\fR
+Matches \fIecn\fR bits in IP ToS or IPv6 traffic class fields, which is
+specified as a decimal number between 0 and 3, inclusive.
+.IP
+When \fBdl_type\fR is wildcarded or set to a value other than 0x0800 or
+0x86dd, the value of \fBnw_ecn\fR is ignored (see \fBFlow Syntax\fR
+above).
+.
 .IP \fBtp_src=\fIport\fR
 .IQ \fBtp_dst=\fIport\fR
 When \fBdl_type\fR and \fBnw_proto\fR specify TCP or UDP, \fBtp_src\fR
@@ -717,6 +725,10 @@ Sets the IP ToS/DSCP field to \fItos\fR.  Valid values are between 0 and
 255, inclusive.  Note that the two lower reserved bits are never
 modified.
 .
+.IP \fBmod_nw_ecn\fB:\fIecn\fR
+Sets the IP ECN field to \fIecn\fR.  Valid values are between 0 and 4,
+inclusive.
+.
 .RE
 .IP
 The following actions are Nicira vendor extensions that, as of this writing, are
-- 
1.7.1




More information about the dev mailing list