[ovs-dev] [PATCH] Implement new fragment handling policy.

Ben Pfaff blp at nicira.com
Wed Oct 19 22:19:38 UTC 2011


On Mon, Oct 17, 2011 at 09:19:44PM -0700, Jesse Gross wrote:
> On Tue, Oct 11, 2011 at 4:33 PM, Ben Pfaff <blp at nicira.com> wrote:
> > diff --git a/datapath/flow.c b/datapath/flow.c
> > index 7322295..9a9b0aa 100644
> > --- a/datapath/flow.c
> > +++ b/datapath/flow.c
> > + * Correct behavior when there's more than one fragment header is anybody's
> > + * guess.  This version reports whether the final fragment header is a first
> > + * fragment.
> 
> I thought about this a bit more and I think what you have here is correct.
> 
> According to RFC 2460, this type of parsing is illegal because you're
> supposed to fully process each extension header as a protocol layer
> before moving onto the next instead of skipping through them.  This
> means that end hosts (which are all that should matter here since
> routers do not do fragmentation and reassembly) will actually do
> iterative reassembly in the case of multiple fragment headers.

Wow.  I never considered that possibility.  Looking at the Linux IPv6
implementation, I think that's what really happens in practice, too.

Your analysis (snipped) is very complete.  Do you want any of it in
the comment?  For now I just changed this to:

 * When there is more than one fragment header, this version reports whether
 * the final fragment header that it examines is a first fragment.

> > +static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
> > +                      u8 *tos_frag)
> [...]
> > +                       if (ntohs(*fp) & ~0x7) {
> > +                               *tos_frag |= OVS_FRAG_TYPE_FIRST;
> > +                               break;
> > +                       }
> > +                       *tos_frag |= OVS_FRAG_TYPE_LATER;
> 
> Aren't these two cases (FIRST and LATER) reversed?

Oops.  Yes, you're right.

I didn't test the IPv6 support before.  Now I have.  It seems to work:
using a flow that just drops IPv6 later fragments I see only first
fragments and non-fragments coming through the switch.

Applied:

@@ -163,10 +162,10 @@ static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
 
 			*tos_frag &= ~OVS_FRAG_TYPE_MASK;
 			if (ntohs(*fp) & ~0x7) {
-				*tos_frag |= OVS_FRAG_TYPE_FIRST;
+				*tos_frag |= OVS_FRAG_TYPE_LATER;
 				break;
 			}
-			*tos_frag |= OVS_FRAG_TYPE_LATER;
+			*tos_frag |= OVS_FRAG_TYPE_FIRST;
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH)
 			hdrlen = (hp->hdrlen+2)<<2;

> > @@ -140,11 +202,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
> >        payload_ofs = (u8 *)(nh + 1) - skb->data;
> >
> >        key->ip.proto = NEXTHDR_NONE;
> > -       key->ip.tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
> > +       key->ip.tos_frag = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
> >        ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr);
> >        ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr);
> >
> > -       payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr);
> > +       payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.tos_frag);
> 
> I think we're missing the UDP GSO case here to mark it as OVS_FRAG_TYPE_FIRST.

TCP GSO is broken into SKB_GSO_TCPV4 and SKB_GSO_TCPV6.  UDP GSO is
SKB_GSO_UDP for both v4 and v6, right?

Applied:

@@ -758,6 +757,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 			goto out;
 		}
 
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
+
 		/* Transport layer. */
 		if (key->ip.proto == NEXTHDR_TCP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);

> > diff --git a/datapath/flow.h b/datapath/flow.h
> > index ae12fe4..31a02fa 100644
> > --- a/datapath/flow.h
> > +++ b/datapath/flow.h
> > @@ -29,6 +29,10 @@ struct sw_flow_actions {
> >        struct nlattr actions[];
> >  };
> >
> > +/* Mask for the OVS_FRAG_TYPE_* value in the low 2 bits of ip.tos_frag in
> > + * struct sw_flow_key. */
> > +#define OVS_FRAG_TYPE_MASK 3
>
> What if we defined this in terms of INET_ECN_MASK?  That seems a
> little clearer.

OK.  (I didn't do it to start off because they aren't actually ECN
bits.)

Applied:

--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -20,6 +20,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <net/inet_ecn.h>
 
 struct sk_buff;
 
@@ -31,7 +32,7 @@ struct sw_flow_actions {
 
 /* Mask for the OVS_FRAG_TYPE_* value in the low 2 bits of ip.tos_frag in
  * struct sw_flow_key. */
-#define OVS_FRAG_TYPE_MASK 3
+#define OVS_FRAG_TYPE_MASK INET_ECN_MASK
 
 struct sw_flow_key {
 	struct {

> > diff --git a/lib/nx-match.c b/lib/nx-match.c
> > index beaed3d..49aaef9 100644
> > --- a/lib/nx-match.c
> > +++ b/lib/nx-match.c
> > +static void
> > +nxm_put_tos_frag(struct ofpbuf *b, const struct cls_rule *cr)
> > +{
> > +    uint8_t tos_frag = cr->flow.tos_frag;
> > +    uint8_t tos_frag_mask = cr->wc.tos_frag_mask;
> > +
> > +    if (tos_frag_mask & IP_DSCP_MASK) {
> > +        nxm_put_8(b, NXM_OF_IP_TOS, tos_frag & IP_DSCP_MASK);
> > +    }
> > +    if (tos_frag_mask & FLOW_FRAG_MASK) {
> > +        uint8_t value, mask;
> > +
> > +        value = mask = 0;
> > +        if (tos_frag_mask & FLOW_FRAG_ANY) {
> > +            mask |= 1;
> > +            if (tos_frag & FLOW_FRAG_ANY) {
> > +                value |= 1;
> > +            }
> > +        }
> > +        if (tos_frag_mask & FLOW_FRAG_FIRST) {
> > +            mask |= 2;
> > +            if (tos_frag & FLOW_FRAG_FIRST) {
> > +                value |= 2;
> > +            }
> > +        }
> > +
> > +        if (mask == 3) {
> > +            mask = UINT8_MAX;
> > +        }
> 
> I'm not sure why we use explicit values here instead of symbolic
> constants.  I believe that they have same value and we use the
> constants on the parse side.

I was trying to keep the FLOW_* constants independent of the values in
nicira-ext.h, but as you point out they leaked anyway.

OK, now I've declared symbolic values for those bits in nicira-ext,
asserted that they're the same as the FLOW_FRAG_ bits, and just use
them directly in nx-match.c too:

diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index ff045e2..36fdee3 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -1550,6 +1550,9 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
  *   - A packet that is an IP fragment with nonzero offset has only bit 0 set
  *     and thus value 1.
  *
+ * NX_IP_FRAG_ANY and NX_IP_FRAG_FIRST are declared to symbolically represent
+ * the meanings of bits 0 and 1.
+ *
  * The switch may reject matches against values that can never appear.
  *
  * It is important to understand how this field interacts with the OpenFlow IP
@@ -1574,6 +1577,9 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
 #define NXM_NX_IP_FRAG     NXM_HEADER  (0x0001, 26, 1)
 #define NXM_NX_IP_FRAG_W   NXM_HEADER_W(0x0001, 26, 1)
 
+/* Bits in the value of NXM_NX_IP_FRAG. */
+#define NX_IP_FRAG_ANY   (1 << 0) /* Is this a fragment? */
+#define NX_IP_FRAG_FIRST (1 << 1) /* Is this a first fragment? */
 
 /* ## --------------------- ## */
 /* ## Requests and replies. ## */
diff --git a/lib/flow.h b/lib/flow.h
index b890507..e3d19bc 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -49,6 +49,9 @@ BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
 #define FLOW_FRAG_FIRST (1 << 1) /* Set for IP fragments with offset 0. */
 #define FLOW_FRAG_MASK  (FLOW_FRAG_ANY | FLOW_FRAG_FIRST)
 
+BUILD_ASSERT_DECL(FLOW_FRAG_ANY == NX_IP_FRAG_ANY);
+BUILD_ASSERT_DECL(FLOW_FRAG_FIRST == NX_IP_FRAG_FIRST);
+
 struct flow {
     ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
     uint32_t regs[FLOW_N_REGS]; /* Registers. */
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 789f5a4..84a14de 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -430,28 +430,20 @@ nxm_put_tos_frag(struct ofpbuf *b, const struct cls_rule *cr)
     if (tos_frag_mask & IP_DSCP_MASK) {
         nxm_put_8(b, NXM_OF_IP_TOS, tos_frag & IP_DSCP_MASK);
     }
-    if (tos_frag_mask & FLOW_FRAG_MASK) {
-        uint8_t value, mask;
-
-        value = mask = 0;
-        if (tos_frag_mask & FLOW_FRAG_ANY) {
-            mask |= 1;
-            if (tos_frag & FLOW_FRAG_ANY) {
-                value |= 1;
-            }
-        }
-        if (tos_frag_mask & FLOW_FRAG_FIRST) {
-            mask |= 2;
-            if (tos_frag & FLOW_FRAG_FIRST) {
-                value |= 2;
-            }
-        }
 
-        if (mask == 3) {
-            mask = UINT8_MAX;
-        }
+    switch (tos_frag_mask & FLOW_FRAG_MASK) {
+    case 0:
+        break;
+
+    case FLOW_FRAG_MASK:
+        /* Output it as exact-match even though only the low 2 bits matter. */
+        nxm_put_8(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK);
+        break;
 
-        nxm_put_8m(b, NXM_NX_IP_FRAG, value, mask);
+    default:
+        nxm_put_8m(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK,
+                   tos_frag_mask & FLOW_FRAG_MASK);
+        break;
     }
 }
 
 
 /* Appends to 'b' the nx_match format that expresses 'cr' (except for

> One general thing that occurs to me is that the not_first option is
> not particularly useful and that a not fragmented/first fragment
> combination would be vastly more interesting (by flipping the first
> bit to be later).  I'm not sure how much it matters though because you
> can get pretty much the same information by looking at the L4 fields
> and exactly the same thing by using multiple flows.

OK, good point.  I applied the following:

diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 36fdee3..aeb1a31 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -1545,12 +1545,12 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
  *   - A packet that is not an IP fragment has value 0.
  *
  *   - A packet that is an IP fragment with offset 0 (the first fragment) has
- *     bits 0 and 1 set and thus value 3.
+ *     bit 0 set and thus value 1.
  *
- *   - A packet that is an IP fragment with nonzero offset has only bit 0 set
- *     and thus value 1.
+ *   - A packet that is an IP fragment with nonzero offset has bits 0 and 1 set
+ *     and thus value 3.
  *
- * NX_IP_FRAG_ANY and NX_IP_FRAG_FIRST are declared to symbolically represent
+ * NX_IP_FRAG_ANY and NX_IP_FRAG_LATER are declared to symbolically represent
  * the meanings of bits 0 and 1.
  *
  * The switch may reject matches against values that can never appear.
@@ -1579,7 +1579,7 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
 
 /* Bits in the value of NXM_NX_IP_FRAG. */
 #define NX_IP_FRAG_ANY   (1 << 0) /* Is this a fragment? */
-#define NX_IP_FRAG_FIRST (1 << 1) /* Is this a first fragment? */
+#define NX_IP_FRAG_LATER (1 << 1) /* Is this a fragment with nonzero offset? */
 
 /* ## --------------------- ## */
 /* ## Requests and replies. ## */
diff --git a/lib/classifier.c b/lib/classifier.c
index d3632a3..0335f13 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -614,21 +614,21 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s)
         ds_put_format(s, "nw_tos=%"PRIu8",", f->tos_frag & IP_DSCP_MASK);
     }
     switch (wc->tos_frag_mask & FLOW_FRAG_MASK) {
-    case FLOW_FRAG_ANY | FLOW_FRAG_FIRST:
+    case FLOW_FRAG_ANY | FLOW_FRAG_LATER:
         ds_put_format(s, "frag=%s,",
                       f->tos_frag & FLOW_FRAG_ANY
-                      ? (f->tos_frag & FLOW_FRAG_FIRST ? "first" : "later")
-                      : (f->tos_frag & FLOW_FRAG_FIRST ? "<error>" : "no"));
+                      ? (f->tos_frag & FLOW_FRAG_LATER ? "later" : "first")
+                      : (f->tos_frag & FLOW_FRAG_LATER ? "no" : "<error>"));
         break;
 
     case FLOW_FRAG_ANY:
         ds_put_format(s, "frag=%s,",
-                      f->tos_frag & FLOW_FRAG_ANY ? "first|later" : "no");
+                      f->tos_frag & FLOW_FRAG_ANY ? "yes" : "no");
         break;
 
-    case FLOW_FRAG_FIRST:
+    case FLOW_FRAG_LATER:
         ds_put_format(s, "frag=%s,",
-                      f->tos_frag & FLOW_FRAG_FIRST ? "first" : "no|later");
+                      f->tos_frag & FLOW_FRAG_LATER ? "later" : "not_later");
         break;
     }
     if (f->nw_proto == IPPROTO_ICMP) {
diff --git a/lib/flow.c b/lib/flow.c
index b0fa77a..06cc822 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -204,10 +204,10 @@ parse_ipv6(struct ofpbuf *packet, struct flow *flow)
             flow->tos_frag &= ~FLOW_FRAG_MASK;
             flow->tos_frag |= FLOW_FRAG_ANY;
             if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
+                flow->tos_frag |= FLOW_FRAG_LATER;
                 nexthdr = IPPROTO_FRAGMENT;
                 break;
             }
-            flow->tos_frag |= FLOW_FRAG_FIRST;
         }
     }
 
@@ -371,8 +371,8 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
             flow->tos_frag = nh->ip_tos & IP_DSCP_MASK;
             if (IP_IS_FRAGMENT(nh->ip_frag_off)) {
                 flow->tos_frag |= FLOW_FRAG_ANY;
-                if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) {
-                    flow->tos_frag |= FLOW_FRAG_FIRST;
+                if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
+                    flow->tos_frag |= FLOW_FRAG_LATER;
                 }
             }
 
@@ -533,8 +533,8 @@ flow_format(struct ds *ds, const struct flow *flow)
     frag = flow->tos_frag & FLOW_FRAG_MASK;
     if (frag) {
         ds_put_format(ds, " frag(%s)",
-                      frag == FLOW_FRAG_ANY ? "later"
-                      : frag == (FLOW_FRAG_ANY | FLOW_FRAG_FIRST) ? "first"
+                      frag == FLOW_FRAG_ANY ? "first"
+                      : frag == (FLOW_FRAG_ANY | FLOW_FRAG_LATER) ? "later"
                       : "<error>");
     }
     if (flow->tp_src || flow->tp_dst) {
@@ -1009,12 +1009,12 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
 
         if (flow->tos_frag & FLOW_FRAG_ANY) {
             ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
-            if (!(flow->tos_frag & FLOW_FRAG_FIRST)) {
+            if (flow->tos_frag & FLOW_FRAG_LATER) {
                 ip->ip_frag_off |= htons(100);
             }
         }
         if (!(flow->tos_frag & FLOW_FRAG_ANY)
-            || flow->tos_frag & FLOW_FRAG_FIRST) {
+            || !(flow->tos_frag & FLOW_FRAG_LATER)) {
             if (flow->nw_proto == IPPROTO_TCP) {
                 struct tcp_header *tcp;
 
diff --git a/lib/flow.h b/lib/flow.h
index e3d19bc..e9da2ad 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -46,11 +46,11 @@ BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
 
 /* Fragment bits, used for IPv4 and IPv6, always zero for non-IP flows. */
 #define FLOW_FRAG_ANY   (1 << 0) /* Set for any IP fragment. */
-#define FLOW_FRAG_FIRST (1 << 1) /* Set for IP fragments with offset 0. */
-#define FLOW_FRAG_MASK  (FLOW_FRAG_ANY | FLOW_FRAG_FIRST)
+#define FLOW_FRAG_LATER (1 << 1) /* Set for IP fragment with nonzero offset. */
+#define FLOW_FRAG_MASK  (FLOW_FRAG_ANY | FLOW_FRAG_LATER)
 
 BUILD_ASSERT_DECL(FLOW_FRAG_ANY == NX_IP_FRAG_ANY);
-BUILD_ASSERT_DECL(FLOW_FRAG_FIRST == NX_IP_FRAG_FIRST);
+BUILD_ASSERT_DECL(FLOW_FRAG_LATER == NX_IP_FRAG_LATER);
 
 struct flow {
     ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index e56f3f2..0f00996 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -1572,20 +1572,20 @@ struct frag_handling {
 
 static const struct frag_handling all_frags[] = {
 #define A FLOW_FRAG_ANY
-#define F FLOW_FRAG_FIRST
+#define L FLOW_FRAG_LATER
     /* name               mask  value */
 
-    { "no",               A|F,  0     },
-    { "first",            A|F,  A|F   },
-    { "later",            A|F,  A     },
+    { "no",               A|L,  0     },
+    { "first",            A|L,  A     },
+    { "later",            A|L,  A|L   },
 
     { "no",               A,    0     },
     { "yes",              A,    A     },
 
-    { "not_first",        F,    0     },
-    { "first",            F,    F     },
+    { "not_later",        L,    0     },
+    { "later",            L,    L     },
 #undef A
-#undef F
+#undef L
 };
 
 static char *
diff --git a/lib/meta-flow.h b/lib/meta-flow.h
index 8a9fa64..696b8ca 100644
--- a/lib/meta-flow.h
+++ b/lib/meta-flow.h
@@ -144,7 +144,7 @@ enum mf_string {
     MFS_IPV4,
     MFS_IPV6,
     MFS_OFP_PORT,               /* An OpenFlow port number or name. */
-    MFS_FRAG                    /* no, yes, first, later, not_first */
+    MFS_FRAG                    /* no, yes, first, later, not_later */
 };
 
 struct mf_field {
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 6d2401a..a7a6a1b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -835,8 +835,8 @@ odp_flow_key_from_string(const char *s, struct ofpbuf *key)
 static uint8_t
 tos_frag_to_odp_frag(uint8_t tos_frag)
 {
-    return (tos_frag & FLOW_FRAG_FIRST ? OVS_FRAG_TYPE_FIRST
-            : tos_frag & FLOW_FRAG_ANY ? OVS_FRAG_TYPE_LATER
+    return (tos_frag & FLOW_FRAG_LATER ? OVS_FRAG_TYPE_LATER
+            : tos_frag & FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST
             : OVS_FRAG_TYPE_NONE);
 }
 
@@ -912,7 +912,7 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
 
     if ((flow->dl_type == htons(ETH_TYPE_IP)
          || flow->dl_type == htons(ETH_TYPE_IPV6))
-        && (flow->tos_frag & FLOW_FRAG_MASK) != FLOW_FRAG_ANY) {
+        && !(flow->tos_frag & FLOW_FRAG_LATER)) {
 
         if (flow->nw_proto == IPPROTO_TCP) {
             struct ovs_key_tcp *tcp_key;
@@ -970,8 +970,8 @@ odp_to_tos_frag(uint8_t odp_tos, uint8_t odp_frag, struct flow *flow)
     flow->tos_frag = odp_tos;
     if (odp_frag != OVS_FRAG_TYPE_NONE) {
         flow->tos_frag |= FLOW_FRAG_ANY;
-        if (odp_frag == OVS_FRAG_TYPE_FIRST) {
-            flow->tos_frag |= FLOW_FRAG_FIRST;
+        if (odp_frag == OVS_FRAG_TYPE_LATER) {
+            flow->tos_frag |= FLOW_FRAG_LATER;
         }
     }
     return true;
@@ -985,7 +985,6 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
 {
     const struct nlattr *nla;
     enum ovs_key_attr prev_type;
-    uint8_t frag_type;
     size_t left;
 
     memset(flow, 0, sizeof *flow);
@@ -1156,7 +1155,6 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
         return EINVAL;
     }
 
-    frag_type = flow->tos_frag & FLOW_FRAG_MASK;
     switch (prev_type) {
     case OVS_KEY_ATTR_UNSPEC:
         return EINVAL;
@@ -1178,7 +1176,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
         return 0;
 
     case OVS_KEY_ATTR_IPV4:
-        if (frag_type == FLOW_FRAG_ANY) {
+        if (flow->tos_frag & FLOW_FRAG_LATER) {
             return 0;
         }
         if (flow->nw_proto == IPPROTO_TCP
@@ -1189,7 +1187,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
         return 0;
 
     case OVS_KEY_ATTR_IPV6:
-        if (frag_type == FLOW_FRAG_ANY) {
+        if (flow->tos_frag & FLOW_FRAG_LATER) {
             return 0;
         }
         if (flow->nw_proto == IPPROTO_TCP
@@ -1202,7 +1200,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
     case OVS_KEY_ATTR_ICMPV6:
         if (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT)
             || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)
-            || frag_type == FLOW_FRAG_ANY) {
+            || flow->tos_frag & FLOW_FRAG_LATER) {
             return EINVAL;
         }
         return 0;
@@ -1211,7 +1209,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
     case OVS_KEY_ATTR_UDP:
     case OVS_KEY_ATTR_ICMP:
     case OVS_KEY_ATTR_ND:
-        if (frag_type == FLOW_FRAG_ANY) {
+        if (flow->tos_frag & FLOW_FRAG_LATER) {
             return EINVAL;
         }
         return 0;
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 58fc18e..536a213 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -2954,7 +2954,7 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
     }
 
     cls = &ofproto->up.tables[table_id];
-    if (flow->tos_frag & FLOW_FRAG_FIRST
+    if ((flow->tos_frag & FLOW_FRAG_MASK) == FLOW_FRAG_ANY
         && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
         /* For OFPC_NORMAL frag_handling, we must pretend that transport ports
          * are unavailable. */
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index c527467..215f8f9 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -522,8 +522,8 @@ Matches all fragments.
 Matches only fragments with offset 0.
 .IP "\fBlater\fR"
 Matches only fragments with nonzero offset.
-.IP "\fBnot_first\fR"
-Matches non-fragmented packets and fragments with nonzero offset.
+.IP "\fBnot_later\fR"
+Matches non-fragmented packets and fragments with zero offset.
 .RE
 .IP
 The \fBip_frag\fR match type is likely to be most useful in

> Otherwise, I don't really have any other comments on the userspace
> portions.  I did look through all of it, although I'm not sure how
> effectively especially in the meta flow related portions.  Hopefully
> most of it would be caught by the compiler though the build assertions
> and name changes though.

Yes, I hope so too.

Here's the full revised patch.

--8<--------------------------cut here-------------------------->8--

From: Ben Pfaff <blp at nicira.com>
Date: Wed, 19 Oct 2011 15:17:57 -0700
Subject: [PATCH] Implement new fragment handling policy.

Until now, OVS has handled IP fragments more awkwardly than necessary.  It
has not been possible to match on L4 headers, even in fragments with offset
0 where they are actually present.  This means that there was no way to
implement ACLs that treat, say, different TCP ports differently, on
fragmented traffic; instead, all decisions for fragment forwarding had to
be made on the basis of L2 and L3 headers alone.

This commit improves the situation significantly.  It is still not possible
to match on L4 headers in fragments with nonzero offset, because that
information is simply not present in such fragments, but this commit adds
the ability to match on L4 headers for fragments with zero offset.  This
means that it becomes possible to implement ACLs that drop such "first
fragments" on the basis of L4 headers.  In practice, that effectively
blocks even fragmented traffic on an L4 basis, because the receiving IP
stack cannot reassemble a full packet when the first fragment is missing.

This commit works by adding a new "fragment type" to the kernel flow match
and making it available through OpenFlow as a new NXM field named
NXM_NX_IP_FRAG.  Because OpenFlow 1.0 explicitly says that the L4 fields
are always 0 for IP fragments, it adds a new OpenFlow fragment handling
mode that fills in the L4 fields for "first fragments".  It also enhances
ovs-ofctl to allow users to configure this new fragment handling mode and
to parse the new field.

Signed-off-by: Ben Pfaff <blp at nicira.com>
Bug #7557.
---
 NEWS                          |    8 ++
 datapath/datapath.c           |   42 ++-----------
 datapath/datapath.h           |    5 --
 datapath/flow.c               |  144 +++++++++++++++++++++++++++++++++-------
 datapath/flow.h               |   10 ++-
 datapath/tunnel.c             |    5 +-
 include/linux/openvswitch.h   |   38 ++++++-----
 include/openflow/nicira-ext.h |   49 ++++++++++++++
 include/openflow/openflow.h   |    1 +
 lib/classifier.c              |   50 ++++++++++++--
 lib/classifier.h              |    2 +
 lib/dpif-linux.c              |   40 -----------
 lib/dpif-netdev.c             |   28 +--------
 lib/dpif-provider.h           |   10 ---
 lib/dpif.c                    |   27 --------
 lib/dpif.h                    |    3 -
 lib/flow.c                    |  117 +++++++++++++++++++++-------------
 lib/flow.h                    |   37 ++++++----
 lib/meta-flow.c               |  124 +++++++++++++++++++++++++++++++++--
 lib/meta-flow.h               |    4 +-
 lib/nx-match.c                |   70 +++++++++++++++++---
 lib/nx-match.def              |    1 +
 lib/odp-util.c                |  123 +++++++++++++++++++++++++++++------
 lib/ofp-print.c               |   18 +----
 lib/ofp-util.c                |   60 ++++++++++++++----
 lib/ofp-util.h                |    3 +
 ofproto/netflow.c             |    2 +-
 ofproto/ofproto-dpif.c        |   73 +++++++++++++++------
 ofproto/ofproto-provider.h    |   40 +++++++++---
 ofproto/ofproto.c             |   38 +++++------
 tests/flowgen.pl              |   25 ++++----
 tests/odp.at                  |   38 +++++++----
 tests/ofproto-dpif.at         |   49 +++++++++++++-
 tests/ovs-ofctl.at            |   52 +++++++++++++++
 tests/test-classifier.c       |   19 ++++--
 tests/test-flows.c            |    3 +
 utilities/ovs-dpctl.c         |    9 +--
 utilities/ovs-ofctl.8.in      |   54 +++++++++++++++
 utilities/ovs-ofctl.c         |  105 +++++++++++++++++++++++++++--
 39 files changed, 1094 insertions(+), 432 deletions(-)

diff --git a/NEWS b/NEWS
index ff3bc44..a05c197 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,14 @@ Post-v1.2.0
         new NXAST_RESUBMIT_TABLE action can look up in additional
         tables.  Tables 128 and above are reserved for use by the
         switch itself; please use only tables 0 through 127.
+    - Fragment handling extensions:
+      - New OFPC_FRAG_NX_MATCH fragment handling mode, in which L4
+        fields are made available for matching in fragments with
+        offset 0.
+      - New NXM_NX_IP_FRAG match field for matching IP fragments (usable
+        via "ip_frag" in ovs-ofctl).
+      - New ovs-ofctl "get-frags" and "set-frags" commands to get and set
+        fragment handling policy.
     - CAPWAP tunneling now supports an extension to transport a 64-key.  By
       default it remains compatible with the old version and other
       standards-based implementations.
diff --git a/datapath/datapath.c b/datapath/datapath.c
index cd29482..2f8027c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -68,8 +68,8 @@ EXPORT_SYMBOL(dp_ioctl_hook);
  * etc.) are protected by RTNL.
  *
  * Writes to other state (flow table modifications, set miscellaneous datapath
- * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
- * lock nests inside genl_mutex.
+ * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
+ * genl_mutex.
  *
  * Reads are protected by RCU.
  *
@@ -289,21 +289,14 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	if (!OVS_CB(skb)->flow) {
 		struct sw_flow_key key;
 		int key_len;
-		bool is_frag;
 
 		/* Extract flow from 'skb' into 'key'. */
-		error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
+		error = flow_extract(skb, p->port_no, &key, &key_len);
 		if (unlikely(error)) {
 			kfree_skb(skb);
 			return;
 		}
 
-		if (is_frag && dp->drop_frags) {
-			consume_skb(skb);
-			stats_counter = &stats->n_frags;
-			goto out;
-		}
-
 		/* Look up flow. */
 		flow = flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
 		if (unlikely(!flow)) {
@@ -643,7 +636,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct datapath *dp;
 	struct ethhdr *eth;
-	bool is_frag;
 	int len;
 	int err;
 	int key_len;
@@ -684,7 +676,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(flow))
 		goto err_kfree_skb;
 
-	err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
+	err = flow_extract(packet, -1, &flow->key, &key_len);
 	if (err)
 		goto err_flow_put;
 
@@ -753,7 +745,7 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 
 	stats->n_flows = flow_tbl_count(table);
 
-	stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
+	stats->n_hit = stats->n_missed = stats->n_lost = 0;
 	for_each_possible_cpu(i) {
 		const struct dp_stats_percpu *percpu_stats;
 		struct dp_stats_percpu local_stats;
@@ -766,7 +758,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 			local_stats = *percpu_stats;
 		} while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
 
-		stats->n_frags += local_stats.n_frags;
 		stats->n_hit += local_stats.n_hit;
 		stats->n_missed += local_stats.n_missed;
 		stats->n_lost += local_stats.n_lost;
@@ -1166,7 +1157,6 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
 	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
 #endif
 	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
-	[OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
 };
 
 static struct genl_family dp_datapath_genl_family = {
@@ -1206,9 +1196,6 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 		goto nla_put_failure;
 	get_dp_stats(dp, nla_data(nla));
 
-	NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
-		    dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
-
 	return genlmsg_end(skb, ovs_header);
 
 nla_put_failure:
@@ -1237,13 +1224,6 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
 
 static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
 {
-	if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
-		u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
-
-		if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP)
-			return -EINVAL;
-	}
-
 	return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
 }
 
@@ -1265,13 +1245,6 @@ static struct datapath *lookup_datapath(struct ovs_header *ovs_header, struct nl
 	return dp ? dp : ERR_PTR(-ENODEV);
 }
 
-/* Called with genl_mutex. */
-static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
-{
-	if (a[OVS_DP_ATTR_IPV4_FRAGS])
-		dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
-}
-
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
@@ -1311,15 +1284,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (!dp->table)
 		goto err_free_dp;
 
-	dp->drop_frags = 0;
 	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
 	if (!dp->stats_percpu) {
 		err = -ENOMEM;
 		goto err_destroy_table;
 	}
 
-	change_datapath(dp, a);
-
 	/* Set up our datapath device. */
 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
 	parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1432,8 +1402,6 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dp))
 		return PTR_ERR(dp);
 
-	change_datapath(dp, info->attrs);
-
 	reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
diff --git a/datapath/datapath.h b/datapath/datapath.h
index b93665c..4964a51 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -33,7 +33,6 @@ struct vport;
 /**
  * struct dp_stats_percpu - per-cpu packet processing statistics for a given
  * datapath.
- * @n_frags: Number of IP fragments processed by datapath.
  * @n_hit: Number of received packets for which a matching flow was found in
  * the flow table.
  * @n_miss: Number of received packets that had no matching flow in the flow
@@ -44,7 +43,6 @@ struct vport;
  * one of the datapath's queues).
  */
 struct dp_stats_percpu {
-	u64 n_frags;
 	u64 n_hit;
 	u64 n_missed;
 	u64 n_lost;
@@ -56,7 +54,6 @@ struct dp_stats_percpu {
  * @rcu: RCU callback head for deferred destruction.
  * @list_node: Element in global 'dps' list.
  * @ifobj: Represents /sys/class/net/<devname>/brif.  Protected by RTNL.
- * @drop_frags: Drop all IP fragments if nonzero.
  * @n_flows: Number of flows currently in flow table.
  * @table: Current flow table.  Protected by genl_lock and RCU.
  * @ports: Map from port number to &struct vport.  %OVSP_LOCAL port
@@ -73,8 +70,6 @@ struct datapath {
 	struct list_head list_node;
 	struct kobject ifobj;
 
-	int drop_frags;
-
 	/* Flow table. */
 	struct flow_table __rcu *table;
 
diff --git a/datapath/flow.c b/datapath/flow.c
index 7322295..7c602bf 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -119,6 +119,67 @@ u64 flow_used_time(unsigned long flow_jiffies)
 	offsetof(struct sw_flow_key, field) +	\
 	FIELD_SIZEOF(struct sw_flow_key, field)
 
+/**
+ * skip_exthdr - skip any IPv6 extension headers
+ * @skb: skbuff to parse
+ * @start: offset of first extension header
+ * @nexthdrp: Initially, points to the type of the extension header at @start.
+ * This function updates it to point to the extension header at the final
+ * offset.
+ * @tos_frag: Points to the @tos_frag member in a &struct sw_flow_key.  This
+ * function sets an appropriate %OVS_FRAG_TYPE_* value.
+ *
+ * This is based on ipv6_skip_exthdr() but adds the updates to *@tos_frag.
+ *
+ * When there is more than one fragment header, this version reports whether
+ * the final fragment header that it examines is a first fragment.
+ *
+ * Returns the final payload offset, or -1 on error.
+ */
+static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
+		       u8 *tos_frag)
+{
+	u8 nexthdr = *nexthdrp;
+
+	while (ipv6_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		int hdrlen;
+
+		if (nexthdr == NEXTHDR_NONE)
+			return -1;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return -1;
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			__be16 _frag_off, *fp;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -1;
+
+			*tos_frag &= ~OVS_FRAG_TYPE_MASK;
+			if (ntohs(*fp) & ~0x7) {
+				*tos_frag |= OVS_FRAG_TYPE_LATER;
+				break;
+			}
+			*tos_frag |= OVS_FRAG_TYPE_FIRST;
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen+2)<<2;
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		nexthdr = hp->nexthdr;
+		start += hdrlen;
+	}
+
+	*nexthdrp = nexthdr;
+	return start;
+}
+
 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
 			 int *key_lenp)
 {
@@ -140,11 +201,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
 	payload_ofs = (u8 *)(nh + 1) - skb->data;
 
 	key->ip.proto = NEXTHDR_NONE;
-	key->ip.tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
+	key->ip.tos_frag = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
 	ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr);
 	ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr);
 
-	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr);
+	payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.tos_frag);
 	if (unlikely(payload_ofs < 0))
 		return -EINVAL;
 
@@ -552,8 +613,6 @@ out:
  * @in_port: port number on which @skb was received.
  * @key: output flow key
  * @key_lenp: length of output flow key
- * @is_frag: set to 1 if @skb contains an IPv4 fragment, or to 0 if @skb does
- * not contain an IPv4 packet or if it is not a fragment.
  *
  * The caller must ensure that skb->len >= ETH_HLEN.
  *
@@ -572,7 +631,7 @@ out:
  *      For other key->dl_type values it is left untouched.
  */
 int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
-		 int *key_lenp, bool *is_frag)
+		 int *key_lenp)
 {
 	int error = 0;
 	int key_len = SW_FLOW_KEY_OFFSET(eth);
@@ -581,7 +640,6 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	memset(key, 0, sizeof(*key));
 	key->eth.tun_id = OVS_CB(skb)->tun_id;
 	key->eth.in_port = in_port;
-	*is_frag = false;
 
 	skb_reset_mac_header(skb);
 
@@ -610,6 +668,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	/* Network layer. */
 	if (key->eth.type == htons(ETH_P_IP)) {
 		struct iphdr *nh;
+		__be16 offset;
 
 		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
 
@@ -625,31 +684,35 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 		nh = ip_hdr(skb);
 		key->ipv4.addr.src = nh->saddr;
 		key->ipv4.addr.dst = nh->daddr;
-		key->ip.tos = nh->tos & ~INET_ECN_MASK;
+
 		key->ip.proto = nh->protocol;
+		key->ip.tos_frag = nh->tos & ~INET_ECN_MASK;
 
-		/* Transport layer. */
-		if ((nh->frag_off & htons(IP_MF | IP_OFFSET)) ||
-		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
-			*is_frag = true;
+		offset = nh->frag_off & htons(IP_OFFSET);
+		if (offset)
+			key->ip.tos_frag |= OVS_FRAG_TYPE_LATER;
+		else if (nh->frag_off & htons(IP_MF) ||
+			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
 
+		/* Transport layer. */
 		if (key->ip.proto == IPPROTO_TCP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-			if (!*is_frag && tcphdr_ok(skb)) {
+			if (!offset && tcphdr_ok(skb)) {
 				struct tcphdr *tcp = tcp_hdr(skb);
 				key->ipv4.tp.src = tcp->source;
 				key->ipv4.tp.dst = tcp->dest;
 			}
 		} else if (key->ip.proto == IPPROTO_UDP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-			if (!*is_frag && udphdr_ok(skb)) {
+			if (!offset && udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
 				key->ipv4.tp.src = udp->source;
 				key->ipv4.tp.dst = udp->dest;
 			}
 		} else if (key->ip.proto == IPPROTO_ICMP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
-			if (!*is_frag && icmphdr_ok(skb)) {
+			if (!offset && icmphdr_ok(skb)) {
 				struct icmphdr *icmp = icmp_hdr(skb);
 				/* The ICMP type and code fields use the 16-bit
 				 * transport port fields, so we need to store them
@@ -694,6 +757,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 			goto out;
 		}
 
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST;
+
 		/* Transport layer. */
 		if (key->ip.proto == NEXTHDR_TCP) {
 			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
@@ -768,6 +834,15 @@ void flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 	}
 }
 
+static int parse_tos_frag(struct sw_flow_key *swkey, u8 tos, u8 frag)
+{
+	if (tos & INET_ECN_MASK || frag > OVS_FRAG_TYPE_MAX)
+		return -EINVAL;
+
+	swkey->ip.tos_frag = tos | frag;
+	return 0;
+}
+
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 static const u32 key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_TUN_ID] = 8,
@@ -797,11 +872,15 @@ static const u32 key_lens[OVS_KEY_ATTR_MAX + 1] = {
  *
  * [tun_id] [in_port] ethernet [8021q] [ethertype \
  *              [IPv4 [TCP|UDP|ICMP] | IPv6 [TCP|UDP|ICMPv6 [ND]] | ARP]]
+ *
+ * except that IPv4 or IPv6 terminates the sequence if its @ipv4_frag or
+ * @ipv6_frag member, respectively, equals %OVS_FRAG_TYPE_LATER.
  */
 int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		      const struct nlattr *attr)
 {
 	int error = 0;
+	enum ovs_frag_type frag_type;
 	const struct nlattr *nla;
 	u16 prev_type;
 	int rem;
@@ -874,11 +953,11 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 				goto invalid;
 			ipv4_key = nla_data(nla);
 			swkey->ip.proto = ipv4_key->ipv4_proto;
-			swkey->ip.tos = ipv4_key->ipv4_tos;
+			if (parse_tos_frag(swkey, ipv4_key->ipv4_tos,
+					   ipv4_key->ipv4_frag))
+				goto invalid;
 			swkey->ipv4.addr.src = ipv4_key->ipv4_src;
 			swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
-			if (swkey->ip.tos & INET_ECN_MASK)
-				goto invalid;
 			break;
 
 		case TRANSITION(OVS_KEY_ATTR_ETHERTYPE, OVS_KEY_ATTR_IPV6):
@@ -887,13 +966,13 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 				goto invalid;
 			ipv6_key = nla_data(nla);
 			swkey->ip.proto = ipv6_key->ipv6_proto;
-			swkey->ip.tos = ipv6_key->ipv6_tos;
+			if (parse_tos_frag(swkey, ipv6_key->ipv6_tos,
+					   ipv6_key->ipv6_frag))
+				goto invalid;
 			memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
 					sizeof(swkey->ipv6.addr.src));
 			memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
 					sizeof(swkey->ipv6.addr.dst));
-			if (swkey->ip.tos & INET_ECN_MASK)
-				goto invalid;
 			break;
 
 		case TRANSITION(OVS_KEY_ATTR_IPV4, OVS_KEY_ATTR_TCP):
@@ -985,6 +1064,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 	if (rem)
 		goto invalid;
 
+	frag_type = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
 	switch (prev_type) {
 	case OVS_KEY_ATTR_UNSPEC:
 		goto invalid;
@@ -1004,6 +1084,8 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		goto ok;
 
 	case OVS_KEY_ATTR_IPV4:
+		if (frag_type == OVS_FRAG_TYPE_LATER)
+			goto ok;
 		if (swkey->ip.proto == IPPROTO_TCP ||
 		    swkey->ip.proto == IPPROTO_UDP ||
 		    swkey->ip.proto == IPPROTO_ICMP)
@@ -1011,6 +1093,8 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 		goto ok;
 
 	case OVS_KEY_ATTR_IPV6:
+		if (frag_type == OVS_FRAG_TYPE_LATER)
+			goto ok;
 		if (swkey->ip.proto == IPPROTO_TCP ||
 		    swkey->ip.proto == IPPROTO_UDP ||
 		    swkey->ip.proto == IPPROTO_ICMPV6)
@@ -1019,15 +1103,20 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 
 	case OVS_KEY_ATTR_ICMPV6:
 		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
-		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
+		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT) ||
+		    frag_type == OVS_FRAG_TYPE_LATER)
 			goto invalid;
 		goto ok;
 
 	case OVS_KEY_ATTR_TCP:
 	case OVS_KEY_ATTR_UDP:
 	case OVS_KEY_ATTR_ICMP:
-	case OVS_KEY_ATTR_ARP:
 	case OVS_KEY_ATTR_ND:
+		if (frag_type == OVS_FRAG_TYPE_LATER)
+			goto invalid;
+		goto ok;
+
+	case OVS_KEY_ATTR_ARP:
 		goto ok;
 
 	default:
@@ -1142,7 +1231,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
 		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
 		ipv4_key->ipv4_proto = swkey->ip.proto;
-		ipv4_key->ipv4_tos = swkey->ip.tos;
+		ipv4_key->ipv4_tos = swkey->ip.tos_frag & ~INET_ECN_MASK;
+		ipv4_key->ipv4_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
 	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
 		struct ovs_key_ipv6 *ipv6_key;
 
@@ -1156,7 +1246,8 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
 				sizeof(ipv6_key->ipv6_dst));
 		ipv6_key->ipv6_proto = swkey->ip.proto;
-		ipv6_key->ipv6_tos = swkey->ip.tos;
+		ipv6_key->ipv6_tos = swkey->ip.tos_frag & ~INET_ECN_MASK;
+		ipv6_key->ipv6_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK;
 	} else if (swkey->eth.type == htons(ETH_P_ARP)) {
 		struct ovs_key_arp *arp_key;
 
@@ -1172,8 +1263,9 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
 		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
 	}
 
-	if (swkey->eth.type == htons(ETH_P_IP) ||
-	    swkey->eth.type == htons(ETH_P_IPV6)) {
+	if ((swkey->eth.type == htons(ETH_P_IP) ||
+	     swkey->eth.type == htons(ETH_P_IPV6)) &&
+	    (swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK) != OVS_FRAG_TYPE_LATER) {
 
 		if (swkey->ip.proto == IPPROTO_TCP) {
 			struct ovs_key_tcp *tcp_key;
diff --git a/datapath/flow.h b/datapath/flow.h
index ade8ac8..af68889 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -20,6 +20,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <net/inet_ecn.h>
 
 struct sk_buff;
 
@@ -29,6 +30,10 @@ struct sw_flow_actions {
 	struct nlattr actions[];
 };
 
+/* Mask for the OVS_FRAG_TYPE_* value in the low 2 bits of ip.tos_frag in
+ * struct sw_flow_key. */
+#define OVS_FRAG_TYPE_MASK INET_ECN_MASK
+
 struct sw_flow_key {
 	struct {
 		__be64 tun_id;		/* Encapsulating tunnel ID. */
@@ -40,7 +45,8 @@ struct sw_flow_key {
 	} eth;
 	struct {
 		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
-		u8     tos;		/* IP ToS (DSCP field, 6 bits). */
+		u8     tos_frag;	/* IP ToS DSCP in high 6 bits,
+					 * OVS_FRAG_TYPE_* in low 2 bits. */
 	} ip;
 	union {
 		struct {
@@ -123,7 +129,7 @@ void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
 int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
-		 int *key_lenp, bool *is_frag);
+		 int *key_lenp);
 void flow_used(struct sw_flow *, struct sk_buff *);
 u64 flow_used_time(unsigned long flow_jiffies);
 
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 8edff06..e3e03a7 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -869,7 +869,6 @@ static struct tnl_cache *build_cache(struct vport *vport,
 		struct sw_flow_key flow_key;
 		struct vport *dst_vport;
 		struct sk_buff *skb;
-		bool is_frag;
 		int err;
 		int flow_key_len;
 		struct sw_flow *flow;
@@ -886,10 +885,10 @@ static struct tnl_cache *build_cache(struct vport *vport,
 		memcpy(skb->data, get_cached_header(cache), cache->len);
 
 		err = flow_extract(skb, dst_vport->port_no, &flow_key,
-				   &flow_key_len, &is_frag);
+				   &flow_key_len);
 
 		consume_skb(skb);
-		if (err || is_frag)
+		if (err)
 			goto done;
 
 		flow = flow_tbl_lookup(rcu_dereference(dst_vport->dp->table),
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index c077f62..5f2385c 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -80,9 +80,6 @@ struct ovs_header {
  * not be sent.
  * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
  * datapath.  Always present in notifications.
- * @OVS_DP_ATTR_IPV4_FRAGS: One of %OVS_DP_FRAG_*.  Always present in
- * notifications.  May be included in %OVS_DP_NEW or %OVS_DP_SET requests to
- * change the fragment handling policy.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_DP_* commands.
@@ -92,27 +89,12 @@ enum ovs_datapath_attr {
 	OVS_DP_ATTR_NAME,       /* name of dp_ifindex netdev */
 	OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
 	OVS_DP_ATTR_STATS,      /* struct ovs_dp_stats */
-	OVS_DP_ATTR_IPV4_FRAGS,	/* 32-bit enum ovs_datapath_frag */
 	__OVS_DP_ATTR_MAX
 };
 
 #define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
 
-/**
- * enum ovs_datapath_frag - policy for handling received IPv4 fragments.
- * @OVS_DP_FRAG_ZERO: Treat IP fragments as IP protocol 0 and transport ports
- * zero.
- * @OVS_DP_FRAG_DROP: Drop IP fragments.  Do not pass them through the flow
- * table or up to userspace.
- */
-enum ovs_datapath_frag {
-	OVS_DP_FRAG_UNSPEC,
-	OVS_DP_FRAG_ZERO,	/* Treat IP fragments as transport port 0. */
-	OVS_DP_FRAG_DROP	/* Drop IP fragments. */
-};
-
 struct ovs_dp_stats {
-    __u64 n_frags;           /* Number of dropped IP fragments. */
     __u64 n_hit;             /* Number of flow table matches. */
     __u64 n_missed;          /* Number of flow table misses. */
     __u64 n_lost;            /* Number of misses not sent to userspace. */
@@ -290,6 +272,24 @@ enum ovs_key_attr {
 
 #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
 
+/**
+ * enum ovs_frag_type - IPv4 and IPv6 fragment type
+ * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
+ * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
+ * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
+ *
+ * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
+ * ovs_key_ipv6.
+ */
+enum ovs_frag_type {
+	OVS_FRAG_TYPE_NONE,
+	OVS_FRAG_TYPE_FIRST,
+	OVS_FRAG_TYPE_LATER,
+	__OVS_FRAG_TYPE_MAX
+};
+
+#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
+
 struct ovs_key_ethernet {
 	__u8	 eth_src[6];
 	__u8	 eth_dst[6];
@@ -305,6 +305,7 @@ struct ovs_key_ipv4 {
 	__be32 ipv4_dst;
 	__u8   ipv4_proto;
 	__u8   ipv4_tos;
+	__u8   ipv4_frag;	/* One of OVS_FRAG_TYPE_*. */
 };
 
 struct ovs_key_ipv6 {
@@ -312,6 +313,7 @@ struct ovs_key_ipv6 {
 	__be32 ipv6_dst[4];
 	__u8   ipv6_proto;
 	__u8   ipv6_tos;
+	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
 };
 
 struct ovs_key_tcp {
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index a6d2db4..aeb1a31 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -1531,6 +1531,55 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24);
  * Masking: Not maskable. */
 #define NXM_NX_ND_TLL      NXM_HEADER  (0x0001, 25, 6)
 
+/* IP fragment information.
+ *
+ * Prereqs:
+ *   NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd.
+ *
+ * Format: 8-bit value with one of the values 0, 1, or 3, as described below.
+ *
+ * Masking: Fully maskable.
+ *
+ * This field has three possible values:
+ *
+ *   - A packet that is not an IP fragment has value 0.
+ *
+ *   - A packet that is an IP fragment with offset 0 (the first fragment) has
+ *     bit 0 set and thus value 1.
+ *
+ *   - A packet that is an IP fragment with nonzero offset has bits 0 and 1 set
+ *     and thus value 3.
+ *
+ * NX_IP_FRAG_ANY and NX_IP_FRAG_LATER are declared to symbolically represent
+ * the meanings of bits 0 and 1.
+ *
+ * The switch may reject matches against values that can never appear.
+ *
+ * It is important to understand how this field interacts with the OpenFlow IP
+ * fragment handling mode:
+ *
+ *   - In OFPC_FRAG_DROP mode, the OpenFlow switch drops all IP fragments
+ *     before they reach the flow table, so every packet that is available for
+ *     matching will have value 0 in this field.
+ *
+ *   - Open vSwitch does not implement OFPC_FRAG_REASM mode, but if it did then
+ *     IP fragments would be reassembled before they reached the flow table and
+ *     again every packet available for matching would always have value 0.
+ *
+ *   - In OFPC_FRAG_NORMAL mode, all three values are possible, but OpenFlow
+ *     1.0 says that fragments' transport ports are always 0, even for the
+ *     first fragment, so this does not provide much extra information.
+ *
+ *   - In OFPC_FRAG_NX_MATCH mode, all three values are possible.  For
+ *     fragments with offset 0, Open vSwitch makes L4 header information
+ *     available.
+ */
+#define NXM_NX_IP_FRAG     NXM_HEADER  (0x0001, 26, 1)
+#define NXM_NX_IP_FRAG_W   NXM_HEADER_W(0x0001, 26, 1)
+
+/* Bits in the value of NXM_NX_IP_FRAG. */
+#define NX_IP_FRAG_ANY   (1 << 0) /* Is this a fragment? */
+#define NX_IP_FRAG_LATER (1 << 1) /* Is this a fragment with nonzero offset? */
 
 /* ## --------------------- ## */
 /* ## Requests and replies. ## */
diff --git a/include/openflow/openflow.h b/include/openflow/openflow.h
index fd8fbeb..cee62e8 100644
--- a/include/openflow/openflow.h
+++ b/include/openflow/openflow.h
@@ -134,6 +134,7 @@ enum ofp_config_flags {
     OFPC_FRAG_NORMAL   = 0,  /* No special handling for fragments. */
     OFPC_FRAG_DROP     = 1,  /* Drop fragments. */
     OFPC_FRAG_REASM    = 2,  /* Reassemble (only if OFPC_IP_REASM set). */
+    OFPC_FRAG_NX_MATCH = 3,  /* Make first fragments available for matching. */
     OFPC_FRAG_MASK     = 3
 };
 
diff --git a/lib/classifier.c b/lib/classifier.c
index 9f4c42b..0335f13 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -319,8 +319,26 @@ cls_rule_set_nw_dst_masked(struct cls_rule *rule, ovs_be32 ip, ovs_be32 mask)
 void
 cls_rule_set_nw_tos(struct cls_rule *rule, uint8_t nw_tos)
 {
-    rule->wc.wildcards &= ~FWW_NW_TOS;
-    rule->flow.nw_tos = nw_tos & IP_DSCP_MASK;
+    rule->wc.tos_frag_mask |= IP_DSCP_MASK;
+    rule->flow.tos_frag &= ~IP_DSCP_MASK;
+    rule->flow.tos_frag |= nw_tos & IP_DSCP_MASK;
+}
+
+void
+cls_rule_set_frag(struct cls_rule *rule, uint8_t frag)
+{
+    rule->wc.tos_frag_mask |= FLOW_FRAG_MASK;
+    rule->flow.tos_frag &= ~FLOW_FRAG_MASK;
+    rule->flow.tos_frag |= frag & FLOW_FRAG_MASK;
+}
+
+void
+cls_rule_set_frag_masked(struct cls_rule *rule, uint8_t frag, uint8_t mask)
+{
+    mask &= FLOW_FRAG_MASK;
+    frag &= mask;
+    rule->wc.tos_frag_mask = (rule->wc.tos_frag_mask & ~FLOW_FRAG_MASK) | mask;
+    rule->flow.tos_frag = (rule->flow.tos_frag & ~FLOW_FRAG_MASK) | frag;
 }
 
 void
@@ -450,7 +468,7 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s)
 
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     if (rule->priority != OFP_DEFAULT_PRIORITY) {
         ds_put_format(s, "priority=%d,", rule->priority);
@@ -592,8 +610,26 @@ cls_rule_format(const struct cls_rule *rule, struct ds *s)
                     ETH_ADDR_ARGS(f->arp_tha));
         }
     }
-    if (!(w & FWW_NW_TOS)) {
-        ds_put_format(s, "nw_tos=%"PRIu8",", f->nw_tos);
+    if (wc->tos_frag_mask & IP_DSCP_MASK) {
+        ds_put_format(s, "nw_tos=%"PRIu8",", f->tos_frag & IP_DSCP_MASK);
+    }
+    switch (wc->tos_frag_mask & FLOW_FRAG_MASK) {
+    case FLOW_FRAG_ANY | FLOW_FRAG_LATER:
+        ds_put_format(s, "frag=%s,",
+                      f->tos_frag & FLOW_FRAG_ANY
+                      ? (f->tos_frag & FLOW_FRAG_LATER ? "later" : "first")
+                      : (f->tos_frag & FLOW_FRAG_LATER ? "no" : "<error>"));
+        break;
+
+    case FLOW_FRAG_ANY:
+        ds_put_format(s, "frag=%s,",
+                      f->tos_frag & FLOW_FRAG_ANY ? "yes" : "no");
+        break;
+
+    case FLOW_FRAG_LATER:
+        ds_put_format(s, "frag=%s,",
+                      f->tos_frag & FLOW_FRAG_LATER ? "later" : "not_later");
+        break;
     }
     if (f->nw_proto == IPPROTO_ICMP) {
         if (!(w & FWW_TP_SRC)) {
@@ -1123,7 +1159,7 @@ flow_equal_except(const struct flow *a, const struct flow *b,
     const flow_wildcards_t wc = wildcards->wildcards;
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     for (i = 0; i < FLOW_N_REGS; i++) {
         if ((a->regs[i] ^ b->regs[i]) & wildcards->reg_masks[i]) {
@@ -1150,7 +1186,7 @@ flow_equal_except(const struct flow *a, const struct flow *b,
             && (wc & FWW_ETH_MCAST
                 || !((a->dl_dst[0] ^ b->dl_dst[0]) & 0x01))
             && (wc & FWW_NW_PROTO || a->nw_proto == b->nw_proto)
-            && (wc & FWW_NW_TOS || a->nw_tos == b->nw_tos)
+            && !((a->tos_frag ^ b->tos_frag) & wildcards->tos_frag_mask)
             && (wc & FWW_ARP_SHA || eth_addr_equals(a->arp_sha, b->arp_sha))
             && (wc & FWW_ARP_THA || eth_addr_equals(a->arp_tha, b->arp_tha))
             && ipv6_equal_except(&a->ipv6_src, &b->ipv6_src,
diff --git a/lib/classifier.h b/lib/classifier.h
index db09022..d5c19f0 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -117,6 +117,8 @@ bool cls_rule_set_nw_src_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask);
 void cls_rule_set_nw_dst(struct cls_rule *, ovs_be32);
 bool cls_rule_set_nw_dst_masked(struct cls_rule *, ovs_be32 ip, ovs_be32 mask);
 void cls_rule_set_nw_tos(struct cls_rule *, uint8_t);
+void cls_rule_set_frag(struct cls_rule *, uint8_t frag);
+void cls_rule_set_frag_masked(struct cls_rule *, uint8_t frag, uint8_t mask);
 void cls_rule_set_icmp_type(struct cls_rule *, uint8_t);
 void cls_rule_set_icmp_code(struct cls_rule *, uint8_t);
 void cls_rule_set_arp_sha(struct cls_rule *, const uint8_t[6]);
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index fa6a05e..2466f91 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -79,7 +79,6 @@ struct dpif_linux_dp {
     const char *name;                  /* OVS_DP_ATTR_NAME. */
     const uint32_t *upcall_pid;        /* OVS_DP_UPCALL_PID. */
     struct ovs_dp_stats stats;         /* OVS_DP_ATTR_STATS. */
-    enum ovs_datapath_frag ipv4_frags; /* OVS_DP_ATTR_IPV4_FRAGS. */
 };
 
 static void dpif_linux_dp_init(struct dpif_linux_dp *);
@@ -347,7 +346,6 @@ dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
 
     error = dpif_linux_dp_get(dpif_, &dp, &buf);
     if (!error) {
-        stats->n_frags  = dp.stats.n_frags;
         stats->n_hit    = dp.stats.n_hit;
         stats->n_missed = dp.stats.n_missed;
         stats->n_lost   = dp.stats.n_lost;
@@ -358,34 +356,6 @@ dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
 }
 
 static int
-dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
-{
-    struct dpif_linux_dp dp;
-    struct ofpbuf *buf;
-    int error;
-
-    error = dpif_linux_dp_get(dpif_, &dp, &buf);
-    if (!error) {
-        *drop_fragsp = dp.ipv4_frags == OVS_DP_FRAG_DROP;
-        ofpbuf_delete(buf);
-    }
-    return error;
-}
-
-static int
-dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
-{
-    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-    struct dpif_linux_dp dp;
-
-    dpif_linux_dp_init(&dp);
-    dp.cmd = OVS_DP_CMD_SET;
-    dp.dp_ifindex = dpif->dp_ifindex;
-    dp.ipv4_frags = drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO;
-    return dpif_linux_dp_transact(&dp, NULL, NULL);
-}
-
-static int
 dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
                     uint16_t *port_nop)
 {
@@ -1206,8 +1176,6 @@ const struct dpif_class dpif_linux_class = {
     dpif_linux_run,
     dpif_linux_wait,
     dpif_linux_get_stats,
-    dpif_linux_get_drop_frags,
-    dpif_linux_set_drop_frags,
     dpif_linux_port_add,
     dpif_linux_port_del,
     dpif_linux_port_query_by_number,
@@ -1540,7 +1508,6 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
                                 .min_len = sizeof(struct ovs_dp_stats),
                                 .max_len = sizeof(struct ovs_dp_stats),
                                 .optional = true },
-        [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
     };
 
     struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
@@ -1571,9 +1538,6 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
         memcpy(&dp->stats, nl_attr_get(a[OVS_DP_ATTR_STATS]),
                sizeof dp->stats);
     }
-    if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
-        dp->ipv4_frags = nl_attr_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
-    }
 
     return 0;
 }
@@ -1599,10 +1563,6 @@ dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
     }
 
     /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
-
-    if (dp->ipv4_frags) {
-        nl_msg_put_u32(buf, OVS_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
-    }
 }
 
 /* Clears 'dp' to "empty" values. */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f78cda2..64f445e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -81,12 +81,10 @@ struct dp_netdev {
     int open_cnt;
     bool destroyed;
 
-    bool drop_frags;            /* Drop all IP fragments, if true. */
     struct dp_netdev_queue queues[N_QUEUES];
     struct hmap flow_table;     /* Flow table. */
 
     /* Statistics. */
-    long long int n_frags;      /* Number of dropped IP fragments. */
     long long int n_hit;        /* Number of flow table matches. */
     long long int n_missed;     /* Number of flow table misses. */
     long long int n_lost;       /* Number of misses not passed to client. */
@@ -198,7 +196,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
     dp->class = class;
     dp->name = xstrdup(name);
     dp->open_cnt = 0;
-    dp->drop_frags = false;
     for (i = 0; i < N_QUEUES; i++) {
         dp->queues[i].head = dp->queues[i].tail = 0;
     }
@@ -302,7 +299,6 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     stats->n_flows = hmap_count(&dp->flow_table);
-    stats->n_frags = dp->n_frags;
     stats->n_hit = dp->n_hit;
     stats->n_missed = dp->n_missed;
     stats->n_lost = dp->n_lost;
@@ -310,22 +306,6 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
 }
 
 static int
-dpif_netdev_get_drop_frags(const struct dpif *dpif, bool *drop_fragsp)
-{
-    struct dp_netdev *dp = get_dp_netdev(dpif);
-    *drop_fragsp = dp->drop_frags;
-    return 0;
-}
-
-static int
-dpif_netdev_set_drop_frags(struct dpif *dpif, bool drop_frags)
-{
-    struct dp_netdev *dp = get_dp_netdev(dpif);
-    dp->drop_frags = drop_frags;
-    return 0;
-}
-
-static int
 do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
             uint16_t port_no)
 {
@@ -1001,11 +981,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port,
     if (packet->size < ETH_HEADER_LEN) {
         return;
     }
-    if (flow_extract(packet, 0, port->port_no, &key) && dp->drop_frags) {
-        dp->n_frags++;
-        return;
-    }
-
+    flow_extract(packet, 0, port->port_no, &key);
     flow = dp_netdev_lookup_flow(dp, &key);
     if (flow) {
         dp_netdev_flow_used(flow, &key, packet);
@@ -1335,8 +1311,6 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_run,
     dpif_netdev_wait,
     dpif_netdev_get_stats,
-    dpif_netdev_get_drop_frags,
-    dpif_netdev_set_drop_frags,
     dpif_netdev_port_add,
     dpif_netdev_port_del,
     dpif_netdev_port_query_by_number,
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index ec662e7..83d56d6 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -110,16 +110,6 @@ struct dpif_class {
     /* Retrieves statistics for 'dpif' into 'stats'. */
     int (*get_stats)(const struct dpif *dpif, struct dpif_dp_stats *stats);
 
-    /* Retrieves 'dpif''s current treatment of IP fragments into '*drop_frags':
-     * true indicates that fragments are dropped, false indicates that
-     * fragments are treated in the same way as other IP packets (except that
-     * the L4 header cannot be read). */
-    int (*get_drop_frags)(const struct dpif *dpif, bool *drop_frags);
-
-    /* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose
-     * meaning is the same as for the get_drop_frags member function. */
-    int (*set_drop_frags)(struct dpif *dpif, bool drop_frags);
-
     /* Adds 'netdev' as a new port in 'dpif'.  If successful, sets '*port_no'
      * to the new port's port number. */
     int (*port_add)(struct dpif *dpif, struct netdev *netdev,
diff --git a/lib/dpif.c b/lib/dpif.c
index a95985a..68a95f6 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -388,33 +388,6 @@ dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
     return error;
 }
 
-/* Retrieves the current IP fragment handling policy for 'dpif' into
- * '*drop_frags': true indicates that fragments are dropped, false indicates
- * that fragments are treated in the same way as other IP packets (except that
- * the L4 header cannot be read).  Returns 0 if successful, otherwise a
- * positive errno value. */
-int
-dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags)
-{
-    int error = dpif->dpif_class->get_drop_frags(dpif, drop_frags);
-    if (error) {
-        *drop_frags = false;
-    }
-    log_operation(dpif, "get_drop_frags", error);
-    return error;
-}
-
-/* Changes 'dpif''s treatment of IP fragments to 'drop_frags', whose meaning is
- * the same as for the get_drop_frags member function.  Returns 0 if
- * successful, otherwise a positive errno value. */
-int
-dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
-{
-    int error = dpif->dpif_class->set_drop_frags(dpif, drop_frags);
-    log_operation(dpif, "set_drop_frags", error);
-    return error;
-}
-
 /* Attempts to add 'netdev' as a port on 'dpif'.  If successful, returns 0 and
  * sets '*port_nop' to the new port's port number (if 'port_nop' is non-null).
  * On failure, returns a positive errno value and sets '*port_nop' to
diff --git a/lib/dpif.h b/lib/dpif.h
index 404c05a..223f990 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -61,7 +61,6 @@ int dpif_delete(struct dpif *);
 
 /* Statisticss for a dpif as a whole. */
 struct dpif_dp_stats {
-    uint64_t n_frags;           /* Number of dropped IP fragments. */
     uint64_t n_hit;             /* Number of flow table matches. */
     uint64_t n_missed;          /* Number of flow table misses. */
     uint64_t n_lost;            /* Number of misses not sent to userspace. */
@@ -69,8 +68,6 @@ struct dpif_dp_stats {
 };
 int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
 
-int dpif_get_drop_frags(const struct dpif *, bool *drop_frags);
-int dpif_set_drop_frags(struct dpif *, bool drop_frags);
 
 /* Port operations. */
 
diff --git a/lib/flow.c b/lib/flow.c
index ded98b2..06cc822 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -148,7 +148,7 @@ parse_ipv6(struct ofpbuf *packet, struct flow *flow)
     flow->ipv6_dst = nh->ip6_dst;
 
     tc_flow = get_unaligned_be32(&nh->ip6_flow);
-    flow->nw_tos = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK;
+    flow->tos_frag = (ntohl(tc_flow) >> 4) & IP_DSCP_MASK;
     flow->nw_proto = IPPROTO_NONE;
 
     while (1) {
@@ -201,7 +201,10 @@ parse_ipv6(struct ofpbuf *packet, struct flow *flow)
             }
 
             /* We only process the first fragment. */
+            flow->tos_frag &= ~FLOW_FRAG_MASK;
+            flow->tos_frag |= FLOW_FRAG_ANY;
             if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
+                flow->tos_frag |= FLOW_FRAG_LATER;
                 nexthdr = IPPROTO_FRAGMENT;
                 break;
             }
@@ -320,13 +323,12 @@ invalid:
  *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
  *      present and has a correct length, and otherwise NULL.
  */
-int
+void
 flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
              struct flow *flow)
 {
     struct ofpbuf b = *packet;
     struct eth_header *eth;
-    int retval = 0;
 
     COVERAGE_INC(flow_extract);
 
@@ -340,7 +342,7 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
     packet->l7 = NULL;
 
     if (b.size < sizeof *eth) {
-        return 0;
+        return;
     }
 
     /* Link layer. */
@@ -360,12 +362,21 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
     if (flow->dl_type == htons(ETH_TYPE_IP)) {
         const struct ip_header *nh = pull_ip(&b);
         if (nh) {
+            packet->l4 = b.data;
+
             flow->nw_src = get_unaligned_be32(&nh->ip_src);
             flow->nw_dst = get_unaligned_be32(&nh->ip_dst);
-            flow->nw_tos = nh->ip_tos & IP_DSCP_MASK;
             flow->nw_proto = nh->ip_proto;
-            packet->l4 = b.data;
-            if (!IP_IS_FRAGMENT(nh->ip_frag_off)) {
+
+            flow->tos_frag = nh->ip_tos & IP_DSCP_MASK;
+            if (IP_IS_FRAGMENT(nh->ip_frag_off)) {
+                flow->tos_frag |= FLOW_FRAG_ANY;
+                if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
+                    flow->tos_frag |= FLOW_FRAG_LATER;
+                }
+            }
+
+            if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) {
                 if (flow->nw_proto == IPPROTO_TCP) {
                     parse_tcp(packet, &b, flow);
                 } else if (flow->nw_proto == IPPROTO_UDP) {
@@ -378,15 +389,11 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
                         packet->l7 = b.data;
                     }
                 }
-            } else {
-                retval = 1;
             }
         }
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
-
-        retval = parse_ipv6(&b, flow);
-        if (retval) {
-            return 0;
+        if (parse_ipv6(&b, flow)) {
+            return;
         }
 
         packet->l4 = b.data;
@@ -419,8 +426,6 @@ flow_extract(struct ofpbuf *packet, ovs_be64 tun_id, uint16_t ofp_in_port,
             }
         }
     }
-
-    return retval;
 }
 
 /* For every bit of a field that is wildcarded in 'wildcards', sets the
@@ -431,7 +436,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
     const flow_wildcards_t wc = wildcards->wildcards;
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     for (i = 0; i < FLOW_N_REGS; i++) {
         flow->regs[i] &= wildcards->reg_masks[i];
@@ -465,9 +470,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
     if (wc & FWW_NW_PROTO) {
         flow->nw_proto = 0;
     }
-    if (wc & FWW_NW_TOS) {
-        flow->nw_tos = 0;
-    }
+    flow->tos_frag &= wildcards->tos_frag_mask;
     if (wc & FWW_ARP_SHA) {
         memset(flow->arp_sha, 0, sizeof flow->arp_sha);
     }
@@ -494,6 +497,8 @@ flow_to_string(const struct flow *flow)
 void
 flow_format(struct ds *ds, const struct flow *flow)
 {
+    int frag;
+
     ds_put_format(ds, "tunnel%#"PRIx64":in_port%04"PRIx16":tci(",
                   ntohll(flow->tun_id), flow->in_port);
     if (flow->vlan_tci) {
@@ -511,7 +516,7 @@ flow_format(struct ds *ds, const struct flow *flow)
 
     if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         ds_put_format(ds, " proto%"PRIu8" tos%"PRIu8" ipv6",
-                      flow->nw_proto, flow->nw_tos);
+                      flow->nw_proto, flow->tos_frag & IP_DSCP_MASK);
         print_ipv6_addr(ds, &flow->ipv6_src);
         ds_put_cstr(ds, "->");
         print_ipv6_addr(ds, &flow->ipv6_dst);
@@ -521,10 +526,17 @@ flow_format(struct ds *ds, const struct flow *flow)
                           " tos%"PRIu8
                           " ip"IP_FMT"->"IP_FMT,
                       flow->nw_proto,
-                      flow->nw_tos,
+                      flow->tos_frag & IP_DSCP_MASK,
                       IP_ARGS(&flow->nw_src),
                       IP_ARGS(&flow->nw_dst));
     }
+    frag = flow->tos_frag & FLOW_FRAG_MASK;
+    if (frag) {
+        ds_put_format(ds, " frag(%s)",
+                      frag == FLOW_FRAG_ANY ? "first"
+                      : frag == (FLOW_FRAG_ANY | FLOW_FRAG_LATER) ? "later"
+                      : "<error>");
+    }
     if (flow->tp_src || flow->tp_dst) {
         ds_put_format(ds, " port%"PRIu16"->%"PRIu16,
                 ntohs(flow->tp_src), ntohs(flow->tp_dst));
@@ -550,6 +562,8 @@ flow_print(FILE *stream, const struct flow *flow)
 void
 flow_wildcards_init_catchall(struct flow_wildcards *wc)
 {
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
+
     wc->wildcards = FWW_ALL;
     wc->tun_id_mask = htonll(0);
     wc->nw_src_mask = htonl(0);
@@ -558,6 +572,7 @@ flow_wildcards_init_catchall(struct flow_wildcards *wc)
     wc->ipv6_dst_mask = in6addr_any;
     memset(wc->reg_masks, 0, sizeof wc->reg_masks);
     wc->vlan_tci_mask = htons(0);
+    wc->tos_frag_mask = 0;
     memset(wc->zeros, 0, sizeof wc->zeros);
 }
 
@@ -566,6 +581,8 @@ flow_wildcards_init_catchall(struct flow_wildcards *wc)
 void
 flow_wildcards_init_exact(struct flow_wildcards *wc)
 {
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
+
     wc->wildcards = 0;
     wc->tun_id_mask = htonll(UINT64_MAX);
     wc->nw_src_mask = htonl(UINT32_MAX);
@@ -574,6 +591,7 @@ flow_wildcards_init_exact(struct flow_wildcards *wc)
     wc->ipv6_dst_mask = in6addr_exact;
     memset(wc->reg_masks, 0xff, sizeof wc->reg_masks);
     wc->vlan_tci_mask = htons(UINT16_MAX);
+    wc->tos_frag_mask = UINT8_MAX;
     memset(wc->zeros, 0, sizeof wc->zeros);
 }
 
@@ -584,7 +602,7 @@ flow_wildcards_is_exact(const struct flow_wildcards *wc)
 {
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     if (wc->wildcards
         || wc->tun_id_mask != htonll(UINT64_MAX)
@@ -592,7 +610,8 @@ flow_wildcards_is_exact(const struct flow_wildcards *wc)
         || wc->nw_dst_mask != htonl(UINT32_MAX)
         || wc->vlan_tci_mask != htons(UINT16_MAX)
         || !ipv6_mask_is_exact(&wc->ipv6_src_mask)
-        || !ipv6_mask_is_exact(&wc->ipv6_dst_mask)) {
+        || !ipv6_mask_is_exact(&wc->ipv6_dst_mask)
+        || wc->tos_frag_mask != UINT8_MAX) {
         return false;
     }
 
@@ -612,7 +631,7 @@ flow_wildcards_is_catchall(const struct flow_wildcards *wc)
 {
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     if (wc->wildcards != FWW_ALL
         || wc->tun_id_mask != htonll(0)
@@ -620,7 +639,8 @@ flow_wildcards_is_catchall(const struct flow_wildcards *wc)
         || wc->nw_dst_mask != htonl(0)
         || wc->vlan_tci_mask != htons(0)
         || !ipv6_mask_is_any(&wc->ipv6_src_mask)
-        || !ipv6_mask_is_any(&wc->ipv6_dst_mask)) {
+        || !ipv6_mask_is_any(&wc->ipv6_dst_mask)
+        || wc->tos_frag_mask != 0) {
         return false;
     }
 
@@ -982,29 +1002,38 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
 
         b->l3 = ip = ofpbuf_put_zeros(b, sizeof *ip);
         ip->ip_ihl_ver = IP_IHL_VER(5, 4);
-        ip->ip_tos = flow->nw_tos;
+        ip->ip_tos = flow->tos_frag & IP_DSCP_MASK;
         ip->ip_proto = flow->nw_proto;
         ip->ip_src = flow->nw_src;
         ip->ip_dst = flow->nw_dst;
 
-        if (flow->nw_proto == IPPROTO_TCP) {
-            struct tcp_header *tcp;
-
-            b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp);
-            tcp->tcp_src = flow->tp_src;
-            tcp->tcp_dst = flow->tp_dst;
-        } else if (flow->nw_proto == IPPROTO_UDP) {
-            struct udp_header *udp;
-
-            b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp);
-            udp->udp_src = flow->tp_src;
-            udp->udp_dst = flow->tp_dst;
-        } else if (flow->nw_proto == IPPROTO_ICMP) {
-            struct icmp_header *icmp;
-
-            b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp);
-            icmp->icmp_type = ntohs(flow->tp_src);
-            icmp->icmp_code = ntohs(flow->tp_dst);
+        if (flow->tos_frag & FLOW_FRAG_ANY) {
+            ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
+            if (flow->tos_frag & FLOW_FRAG_LATER) {
+                ip->ip_frag_off |= htons(100);
+            }
+        }
+        if (!(flow->tos_frag & FLOW_FRAG_ANY)
+            || !(flow->tos_frag & FLOW_FRAG_LATER)) {
+            if (flow->nw_proto == IPPROTO_TCP) {
+                struct tcp_header *tcp;
+
+                b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp);
+                tcp->tcp_src = flow->tp_src;
+                tcp->tcp_dst = flow->tp_dst;
+            } else if (flow->nw_proto == IPPROTO_UDP) {
+                struct udp_header *udp;
+
+                b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp);
+                udp->udp_src = flow->tp_src;
+                udp->udp_dst = flow->tp_dst;
+            } else if (flow->nw_proto == IPPROTO_ICMP) {
+                struct icmp_header *icmp;
+
+                b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp);
+                icmp->icmp_type = ntohs(flow->tp_src);
+                icmp->icmp_code = ntohs(flow->tp_dst);
+            }
         }
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         /* XXX */
diff --git a/lib/flow.h b/lib/flow.h
index 736890a..e9da2ad 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -35,7 +35,7 @@ struct ofpbuf;
 /* This sequence number should be incremented whenever anything involving flows
  * or the wildcarding of flows changes.  This will cause build assertion
  * failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 2
+#define FLOW_WC_SEQ 3
 
 #define FLOW_N_REGS 5
 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
@@ -44,6 +44,14 @@ BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
  * type, that is, pure 802.2 frames. */
 #define FLOW_DL_TYPE_NONE 0x5ff
 
+/* Fragment bits, used for IPv4 and IPv6, always zero for non-IP flows. */
+#define FLOW_FRAG_ANY   (1 << 0) /* Set for any IP fragment. */
+#define FLOW_FRAG_LATER (1 << 1) /* Set for IP fragment with nonzero offset. */
+#define FLOW_FRAG_MASK  (FLOW_FRAG_ANY | FLOW_FRAG_LATER)
+
+BUILD_ASSERT_DECL(FLOW_FRAG_ANY == NX_IP_FRAG_ANY);
+BUILD_ASSERT_DECL(FLOW_FRAG_LATER == NX_IP_FRAG_LATER);
+
 struct flow {
     ovs_be64 tun_id;            /* Encapsulating tunnel ID. */
     uint32_t regs[FLOW_N_REGS]; /* Registers. */
@@ -57,7 +65,7 @@ struct flow {
     uint8_t dl_src[6];          /* Ethernet source address. */
     uint8_t dl_dst[6];          /* Ethernet destination address. */
     uint8_t nw_proto;           /* IP protocol or low 8 bits of ARP opcode. */
-    uint8_t nw_tos;             /* IP ToS (DSCP field, 6 bits). */
+    uint8_t tos_frag;           /* IP ToS in top bits, FLOW_FRAG_* in low. */
     uint8_t arp_sha[6];         /* ARP/ND source hardware address. */
     uint8_t arp_tha[6];         /* ARP/ND target hardware address. */
     struct in6_addr ipv6_src;   /* IPv6 source address. */
@@ -74,10 +82,10 @@ BUILD_ASSERT_DECL(sizeof(((struct flow *)0)->nd_target) == 16);
 BUILD_ASSERT_DECL(sizeof(struct flow) == FLOW_SIG_SIZE + FLOW_PAD_SIZE);
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
-BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 2);
+BUILD_ASSERT_DECL(FLOW_SIG_SIZE == 120 && FLOW_WC_SEQ == 3);
 
-int flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port,
-                 struct flow *);
+void flow_extract(struct ofpbuf *, ovs_be64 tun_id, uint16_t in_port,
+                  struct flow *);
 void flow_zero_wildcards(struct flow *, const struct flow_wildcards *);
 
 char *flow_to_string(const struct flow *);
@@ -124,18 +132,16 @@ typedef unsigned int OVS_BITWISE flow_wildcards_t;
 #define FWW_NW_PROTO    ((OVS_FORCE flow_wildcards_t) (1 << 5))
 #define FWW_TP_SRC      ((OVS_FORCE flow_wildcards_t) (1 << 6))
 #define FWW_TP_DST      ((OVS_FORCE flow_wildcards_t) (1 << 7))
-/* Same meanings as corresponding OFPFW_* bits, but differ in value. */
-#define FWW_NW_TOS      ((OVS_FORCE flow_wildcards_t) (1 << 1))
 /* No corresponding OFPFW_* bits. */
-#define FWW_ETH_MCAST   ((OVS_FORCE flow_wildcards_t) (1 << 8))
+#define FWW_ETH_MCAST   ((OVS_FORCE flow_wildcards_t) (1 << 1))
                                                        /* multicast bit only */
-#define FWW_ARP_SHA     ((OVS_FORCE flow_wildcards_t) (1 << 9))
-#define FWW_ARP_THA     ((OVS_FORCE flow_wildcards_t) (1 << 10))
-#define FWW_ND_TARGET   ((OVS_FORCE flow_wildcards_t) (1 << 11))
-#define FWW_ALL         ((OVS_FORCE flow_wildcards_t) (((1 << 12)) - 1))
+#define FWW_ARP_SHA     ((OVS_FORCE flow_wildcards_t) (1 << 8))
+#define FWW_ARP_THA     ((OVS_FORCE flow_wildcards_t) (1 << 9))
+#define FWW_ND_TARGET   ((OVS_FORCE flow_wildcards_t) (1 << 10))
+#define FWW_ALL         ((OVS_FORCE flow_wildcards_t) (((1 << 11)) - 1))
 
 /* Remember to update FLOW_WC_SEQ when adding or removing FWW_*. */
-BUILD_ASSERT_DECL(FWW_ALL == ((1 << 12) - 1) && FLOW_WC_SEQ == 2);
+BUILD_ASSERT_DECL(FWW_ALL == ((1 << 11) - 1) && FLOW_WC_SEQ == 3);
 
 /* Information on wildcards for a flow, as a supplement to "struct flow".
  *
@@ -150,11 +156,12 @@ struct flow_wildcards {
     struct in6_addr ipv6_src_mask; /* 1-bit in each signficant ipv6_src bit. */
     struct in6_addr ipv6_dst_mask; /* 1-bit in each signficant ipv6_dst bit. */
     ovs_be16 vlan_tci_mask;     /* 1-bit in each significant vlan_tci bit. */
-    uint8_t zeros[6];           /* Padding field set to zero. */
+    uint8_t tos_frag_mask;      /* 1-bit in each significant tos_frag bit. */
+    uint8_t zeros[5];           /* Padding field set to zero. */
 };
 
 /* Remember to update FLOW_WC_SEQ when updating struct flow_wildcards. */
-BUILD_ASSERT_DECL(sizeof(struct flow_wildcards) == 80 && FLOW_WC_SEQ == 2);
+BUILD_ASSERT_DECL(sizeof(struct flow_wildcards) == 80 && FLOW_WC_SEQ == 3);
 
 void flow_wildcards_init_catchall(struct flow_wildcards *);
 void flow_wildcards_init_exact(struct flow_wildcards *);
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index d5226f0..0f00996 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -182,10 +182,17 @@ static const struct mf_field mf_fields[MFF_N_IDS] = {
     }, {
         MFF_IP_TOS, "nw_tos", NULL,
         MF_FIELD_SIZES(u8),
-        MFM_NONE, FWW_NW_TOS,
+        MFM_NONE, 0,
         MFS_DECIMAL,
         MFP_IP_ANY,
         NXM_OF_IP_TOS,
+    }, {
+        MFF_IP_FRAG, "ip_frag", NULL,
+        1, 2,
+        MFM_FULLY, 0,
+        MFS_FRAG,
+        MFP_IP_ANY,
+        NXM_NX_IP_FRAG,
     },
 
     {
@@ -347,7 +354,6 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
     case MFF_ETH_SRC:
     case MFF_ETH_TYPE:
     case MFF_IP_PROTO:
-    case MFF_IP_TOS:
     case MFF_ARP_OP:
     case MFF_ARP_SHA:
     case MFF_ARP_THA:
@@ -407,6 +413,11 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
     case MFF_IPV6_DST:
         return ipv6_mask_is_any(&wc->ipv6_dst_mask);
 
+    case MFF_IP_TOS:
+        return !(wc->tos_frag_mask & IP_DSCP_MASK);
+    case MFF_IP_FRAG:
+        return !(wc->tos_frag_mask & FLOW_FRAG_MASK);
+
     case MFF_ARP_SPA:
         return !wc->nw_src_mask;
     case MFF_ARP_TPA:
@@ -433,7 +444,6 @@ mf_get_mask(const struct mf_field *mf, const struct flow_wildcards *wc,
     case MFF_ETH_SRC:
     case MFF_ETH_TYPE:
     case MFF_IP_PROTO:
-    case MFF_IP_TOS:
     case MFF_ARP_OP:
     case MFF_ARP_SHA:
     case MFF_ARP_THA:
@@ -504,6 +514,13 @@ mf_get_mask(const struct mf_field *mf, const struct flow_wildcards *wc,
         mask->ipv6 = wc->ipv6_dst_mask;
         break;
 
+    case MFF_IP_TOS:
+        mask->u8 = wc->tos_frag_mask & IP_DSCP_MASK;
+        break;
+    case MFF_IP_FRAG:
+        mask->u8 = wc->tos_frag_mask & FLOW_FRAG_MASK;
+        break;
+
     case MFF_ARP_SPA:
         mask->be32 = wc->nw_src_mask;
         break;
@@ -666,7 +683,9 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value)
         return true;
 
     case MFF_IP_TOS:
-        return !(value->u8 & 0x03);
+        return !(value->u8 & ~IP_DSCP_MASK);
+    case MFF_IP_FRAG:
+        return !(value->u8 & ~FLOW_FRAG_MASK);
 
     case MFF_ARP_OP:
         return !(value->be16 & htons(0xff00));
@@ -764,7 +783,11 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow,
         break;
 
     case MFF_IP_TOS:
-        value->u8 = flow->nw_tos;
+        value->u8 = flow->tos_frag & IP_DSCP_MASK;
+        break;
+
+    case MFF_IP_FRAG:
+        value->u8 = flow->tos_frag & FLOW_FRAG_MASK;
         break;
 
     case MFF_ARP_OP:
@@ -910,6 +933,10 @@ mf_set_value(const struct mf_field *mf,
         cls_rule_set_nw_tos(rule, value->u8);
         break;
 
+    case MFF_IP_FRAG:
+        cls_rule_set_frag(rule, value->u8);
+        break;
+
     case MFF_ARP_OP:
         cls_rule_set_nw_proto(rule, ntohs(value->be16));
         break;
@@ -1065,8 +1092,13 @@ mf_set_wild(const struct mf_field *mf, struct cls_rule *rule)
         break;
 
     case MFF_IP_TOS:
-        rule->wc.wildcards |= FWW_NW_TOS;
-        rule->flow.nw_tos = 0;
+        rule->wc.tos_frag_mask |= IP_DSCP_MASK;
+        rule->flow.tos_frag &= ~IP_DSCP_MASK;
+        break;
+
+    case MFF_IP_FRAG:
+        rule->wc.tos_frag_mask |= FLOW_FRAG_MASK;
+        rule->flow.tos_frag &= ~FLOW_FRAG_MASK;
         break;
 
     case MFF_ARP_OP:
@@ -1209,6 +1241,10 @@ mf_set(const struct mf_field *mf,
         cls_rule_set_ipv6_dst_masked(rule, &value->ipv6, &mask->ipv6);
         break;
 
+    case MFF_IP_FRAG:
+        cls_rule_set_frag_masked(rule, value->u8, mask->u8);
+        break;
+
     case MFF_ARP_SPA:
         cls_rule_set_nw_src_masked(rule, value->be32, mask->be32);
         break;
@@ -1361,6 +1397,10 @@ mf_random_value(const struct mf_field *mf, union mf_value *value)
         value->u8 &= ~0x03;
         break;
 
+    case MFF_IP_FRAG:
+        value->u8 &= FLOW_FRAG_MASK;
+        break;
+
     case MFF_ARP_OP:
         value->be16 &= htons(0xff);
         break;
@@ -1524,6 +1564,49 @@ mf_from_ofp_port_string(const struct mf_field *mf, const char *s,
     }
 }
 
+struct frag_handling {
+    const char *name;
+    uint8_t mask;
+    uint8_t value;
+};
+
+static const struct frag_handling all_frags[] = {
+#define A FLOW_FRAG_ANY
+#define L FLOW_FRAG_LATER
+    /* name               mask  value */
+
+    { "no",               A|L,  0     },
+    { "first",            A|L,  A     },
+    { "later",            A|L,  A|L   },
+
+    { "no",               A,    0     },
+    { "yes",              A,    A     },
+
+    { "not_later",        L,    0     },
+    { "later",            L,    L     },
+#undef A
+#undef L
+};
+
+static char *
+mf_from_frag_string(const char *s, uint8_t *valuep, uint8_t *maskp)
+{
+    const struct frag_handling *h;
+
+    for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) {
+        if (!strcasecmp(s, h->name)) {
+            /* We force the upper bits of the mask on to make mf_parse_value()
+             * happy (otherwise it will never think it's an exact match.) */
+            *maskp = h->mask | ~FLOW_FRAG_MASK;
+            *valuep = h->value;
+            return NULL;
+        }
+    }
+
+    return xasprintf("%s: unknown fragment type (valid types are \"no\", "
+                     "\"yes\", \"first\", \"later\", \"not_first\"", s);
+}
+
 /* Parses 's', a string value for field 'mf', into 'value' and 'mask'.  Returns
  * NULL if successful, otherwise a malloc()'d string describing the error. */
 char *
@@ -1553,6 +1636,9 @@ mf_parse(const struct mf_field *mf, const char *s,
 
     case MFS_OFP_PORT:
         return mf_from_ofp_port_string(mf, s, &value->be16, &mask->be16);
+
+    case MFS_FRAG:
+        return mf_from_frag_string(s, &value->u8, &mask->u8);
     }
     NOT_REACHED();
 }
@@ -1610,6 +1696,26 @@ mf_format_integer_string(const struct mf_field *mf, const uint8_t *valuep,
     }
 }
 
+static void
+mf_format_frag_string(const uint8_t *valuep, const uint8_t *maskp,
+                      struct ds *s)
+{
+    const struct frag_handling *h;
+    uint8_t value = *valuep;
+    uint8_t mask = *maskp;
+
+    value &= mask;
+    mask &= FLOW_FRAG_MASK;
+
+    for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) {
+        if (value == h->value && mask == h->mask) {
+            ds_put_cstr(s, h->name);
+            return;
+        }
+    }
+    ds_put_cstr(s, "<error>");
+}
+
 /* Appends to 's' a string representation of field 'mf' whose value is in
  * 'value' and 'mask'.  'mask' may be NULL to indicate an exact match. */
 void
@@ -1654,6 +1760,10 @@ mf_format(const struct mf_field *mf,
         print_ipv6_masked(s, &value->ipv6, mask ? &mask->ipv6 : NULL);
         break;
 
+    case MFS_FRAG:
+        mf_format_frag_string(&value->u8, &mask->u8, s);
+        break;
+
     default:
         NOT_REACHED();
     }
diff --git a/lib/meta-flow.h b/lib/meta-flow.h
index f2508d5..696b8ca 100644
--- a/lib/meta-flow.h
+++ b/lib/meta-flow.h
@@ -68,6 +68,7 @@ enum mf_field_id {
 
     MFF_IP_PROTO,               /* u8 (used for IPv4 or IPv6) */
     MFF_IP_TOS,                 /* u8 (used for IPv4 or IPv6) */
+    MFF_IP_FRAG,                /* u8 (used for IPv4 or IPv6) */
 
     MFF_ARP_OP,                 /* be16 */
     MFF_ARP_SPA,                /* be32 */
@@ -142,7 +143,8 @@ enum mf_string {
     MFS_ETHERNET,
     MFS_IPV4,
     MFS_IPV6,
-    MFS_OFP_PORT                /* An OpenFlow port number or name. */
+    MFS_OFP_PORT,               /* An OpenFlow port number or name. */
+    MFS_FRAG                    /* no, yes, first, later, not_later */
 };
 
 struct mf_field {
diff --git a/lib/nx-match.c b/lib/nx-match.c
index a850be2..84a14de 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -266,6 +266,24 @@ nxm_put_8(struct ofpbuf *b, uint32_t header, uint8_t value)
 }
 
 static void
+nxm_put_8m(struct ofpbuf *b, uint32_t header, uint8_t value, uint8_t mask)
+{
+    switch (mask) {
+    case 0:
+        break;
+
+    case UINT8_MAX:
+        nxm_put_8(b, header, value);
+        break;
+
+    default:
+        nxm_put_header(b, NXM_MAKE_WILD_HEADER(header));
+        ofpbuf_put(b, &value, sizeof value);
+        ofpbuf_put(b, &mask, sizeof mask);
+    }
+}
+
+static void
 nxm_put_16(struct ofpbuf *b, uint32_t header, ovs_be16 value)
 {
     nxm_put_header(b, header);
@@ -403,6 +421,32 @@ nxm_put_ipv6(struct ofpbuf *b, uint32_t header,
     }
 }
 
+static void
+nxm_put_tos_frag(struct ofpbuf *b, const struct cls_rule *cr)
+{
+    uint8_t tos_frag = cr->flow.tos_frag;
+    uint8_t tos_frag_mask = cr->wc.tos_frag_mask;
+
+    if (tos_frag_mask & IP_DSCP_MASK) {
+        nxm_put_8(b, NXM_OF_IP_TOS, tos_frag & IP_DSCP_MASK);
+    }
+
+    switch (tos_frag_mask & FLOW_FRAG_MASK) {
+    case 0:
+        break;
+
+    case FLOW_FRAG_MASK:
+        /* Output it as exact-match even though only the low 2 bits matter. */
+        nxm_put_8(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK);
+        break;
+
+    default:
+        nxm_put_8m(b, NXM_NX_IP_FRAG, tos_frag & FLOW_FRAG_MASK,
+                   tos_frag_mask & FLOW_FRAG_MASK);
+        break;
+    }
+}
+
 /* Appends to 'b' the nx_match format that expresses 'cr' (except for
  * 'cr->priority', because priority is not part of nx_match), plus enough
  * zero bytes to pad the nx_match out to a multiple of 8.
@@ -422,7 +466,7 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr)
     int match_len;
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     /* Metadata. */
     if (!(wc & FWW_IN_PORT)) {
@@ -446,9 +490,7 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr)
     /* L3. */
     if (!(wc & FWW_DL_TYPE) && flow->dl_type == htons(ETH_TYPE_IP)) {
         /* IP. */
-        if (!(wc & FWW_NW_TOS)) {
-            nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & 0xfc);
-        }
+        nxm_put_tos_frag(b, cr);
         nxm_put_32m(b, NXM_OF_IP_SRC, flow->nw_src, cr->wc.nw_src_mask);
         nxm_put_32m(b, NXM_OF_IP_DST, flow->nw_dst, cr->wc.nw_dst_mask);
 
@@ -488,10 +530,7 @@ nx_put_match(struct ofpbuf *b, const struct cls_rule *cr)
         }
     } else if (!(wc & FWW_DL_TYPE) && flow->dl_type == htons(ETH_TYPE_IPV6)) {
         /* IPv6. */
-
-        if (!(wc & FWW_NW_TOS)) {
-            nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & 0xfc);
-        }
+        nxm_put_tos_frag(b, cr);
         nxm_put_ipv6(b, NXM_NX_IPV6_SRC, &flow->ipv6_src,
                 &cr->wc.ipv6_src_mask);
         nxm_put_ipv6(b, NXM_NX_IPV6_DST, &flow->ipv6_dst,
@@ -1000,7 +1039,10 @@ nxm_read_field(const struct nxm_field *src, const struct flow *flow)
         return ntohs(flow->vlan_tci);
 
     case NFI_NXM_OF_IP_TOS:
-        return flow->nw_tos;
+        return flow->tos_frag & IP_DSCP_MASK;
+
+    case NFI_NXM_NX_IP_FRAG:
+        return flow->tos_frag & FLOW_FRAG_MASK;
 
     case NFI_NXM_OF_IP_PROTO:
     case NFI_NXM_OF_ARP_OP:
@@ -1075,6 +1117,7 @@ nxm_read_field(const struct nxm_field *src, const struct flow *flow)
     case NFI_NXM_NX_IPV6_SRC_W:
     case NFI_NXM_NX_IPV6_DST:
     case NFI_NXM_NX_IPV6_DST_W:
+    case NFI_NXM_NX_IP_FRAG_W:
     case NFI_NXM_NX_ND_TARGET:
     case N_NXM_FIELDS:
         NOT_REACHED();
@@ -1146,7 +1189,13 @@ nxm_write_field(const struct nxm_field *dst, struct flow *flow,
 #endif
 
     case NFI_NXM_OF_IP_TOS:
-        flow->nw_tos = new_value & IP_DSCP_MASK;
+        flow->tos_frag &= ~IP_DSCP_MASK;
+        flow->tos_frag |= new_value & IP_DSCP_MASK;
+        break;
+
+    case NFI_NXM_NX_IP_FRAG:
+        flow->tos_frag &= ~FLOW_FRAG_MASK;
+        flow->tos_frag |= new_value & FLOW_FRAG_MASK;
         break;
 
     case NFI_NXM_OF_IP_SRC:
@@ -1188,6 +1237,7 @@ nxm_write_field(const struct nxm_field *dst, struct flow *flow,
     case NFI_NXM_NX_IPV6_SRC_W:
     case NFI_NXM_NX_IPV6_DST:
     case NFI_NXM_NX_IPV6_DST_W:
+    case NFI_NXM_NX_IP_FRAG_W:
     case NFI_NXM_NX_ICMPV6_TYPE:
     case NFI_NXM_NX_ICMPV6_CODE:
     case NFI_NXM_NX_ND_TARGET:
diff --git a/lib/nx-match.def b/lib/nx-match.def
index 3f2882c..5c0a238 100644
--- a/lib/nx-match.def
+++ b/lib/nx-match.def
@@ -30,6 +30,7 @@ DEFINE_FIELD  (OF_IP_TOS,     MFF_IP_TOS,     true)
 DEFINE_FIELD  (OF_IP_PROTO,   MFF_IP_PROTO,  false)
 DEFINE_FIELD_M(OF_IP_SRC,     MFF_IPV4_SRC,   true)
 DEFINE_FIELD_M(OF_IP_DST,     MFF_IPV4_DST,   true)
+DEFINE_FIELD_M(NX_IP_FRAG,    MFF_IP_FRAG,   false)
 DEFINE_FIELD  (OF_TCP_SRC,    MFF_TCP_SRC,    true)
 DEFINE_FIELD  (OF_TCP_DST,    MFF_TCP_DST,    true)
 DEFINE_FIELD  (OF_UDP_SRC,    MFF_UDP_SRC,    true)
diff --git a/lib/odp-util.c b/lib/odp-util.c
index a471099..a7a6a1b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -321,6 +321,22 @@ format_generic_odp_key(const struct nlattr *a, struct ds *ds)
     }
 }
 
+static const char *
+ovs_frag_type_to_string(enum ovs_frag_type type)
+{
+    switch (type) {
+    case OVS_FRAG_TYPE_NONE:
+        return "no";
+    case OVS_FRAG_TYPE_FIRST:
+        return "first";
+    case OVS_FRAG_TYPE_LATER:
+        return "later";
+    case __OVS_FRAG_TYPE_MAX:
+    default:
+        return "<error>";
+    }
+}
+
 static void
 format_odp_key_attr(const struct nlattr *a, struct ds *ds)
 {
@@ -378,10 +394,11 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
     case OVS_KEY_ATTR_IPV4:
         ipv4_key = nl_attr_get(a);
         ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT","
-                      "proto=%"PRId8",tos=%"PRIu8")",
+                      "proto=%"PRId8",tos=%"PRIu8",frag=%s)",
                       IP_ARGS(&ipv4_key->ipv4_src),
                       IP_ARGS(&ipv4_key->ipv4_dst),
-                      ipv4_key->ipv4_proto, ipv4_key->ipv4_tos);
+                      ipv4_key->ipv4_proto, ipv4_key->ipv4_tos,
+                      ovs_frag_type_to_string(ipv4_key->ipv4_frag));
         break;
 
     case OVS_KEY_ATTR_IPV6: {
@@ -392,9 +409,11 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
         inet_ntop(AF_INET6, ipv6_key->ipv6_src, src_str, sizeof src_str);
         inet_ntop(AF_INET6, ipv6_key->ipv6_dst, dst_str, sizeof dst_str);
 
-        ds_put_format(ds, "ipv6(src=%s,dst=%s,proto=%"PRId8",tos=%"PRIu8")",
+        ds_put_format(ds, "ipv6(src=%s,dst=%s,proto=%"PRId8",tos=%"PRIu8","
+                      "frag=%s)",
                       src_str, dst_str, ipv6_key->ipv6_proto,
-                      ipv6_key->ipv6_tos);
+                      ipv6_key->ipv6_tos,
+                      ovs_frag_type_to_string(ipv6_key->ipv6_frag));
         break;
     }
 
@@ -502,6 +521,21 @@ put_nd_key(int n, const char *nd_target_s,
     return n;
 }
 
+static bool
+ovs_frag_type_from_string(const char *s, enum ovs_frag_type *type)
+{
+    if (!strcasecmp(s, "no")) {
+        *type = OVS_FRAG_TYPE_NONE;
+    } else if (!strcasecmp(s, "first")) {
+        *type = OVS_FRAG_TYPE_FIRST;
+    } else if (!strcasecmp(s, "later")) {
+        *type = OVS_FRAG_TYPE_LATER;
+    } else {
+        return false;
+    }
+    return true;
+}
+
 static int
 parse_odp_key_attr(const char *s, struct ofpbuf *key)
 {
@@ -586,13 +620,16 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key)
         ovs_be32 ipv4_dst;
         int ipv4_proto;
         int ipv4_tos;
+        char frag[8];
+        enum ovs_frag_type ipv4_frag;
         int n = -1;
 
         if (sscanf(s, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT","
-                   "proto=%i,tos=%i)%n",
-                   IP_SCAN_ARGS(&ipv4_src),
-                   IP_SCAN_ARGS(&ipv4_dst), &ipv4_proto, &ipv4_tos, &n) > 0
-            && n > 0) {
+                   "proto=%i,tos=%i,frag=%7[a-z])%n",
+                   IP_SCAN_ARGS(&ipv4_src), IP_SCAN_ARGS(&ipv4_dst),
+                   &ipv4_proto, &ipv4_tos, frag, &n) > 0
+            && n > 0
+            && ovs_frag_type_from_string(frag, &ipv4_frag)) {
             struct ovs_key_ipv4 ipv4_key;
 
             memset(&ipv4_key, 0, sizeof ipv4_key);
@@ -600,6 +637,7 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key)
             ipv4_key.ipv4_dst = ipv4_dst;
             ipv4_key.ipv4_proto = ipv4_proto;
             ipv4_key.ipv4_tos = ipv4_tos;
+            ipv4_key.ipv4_frag = ipv4_frag;
             nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV4,
                               &ipv4_key, sizeof ipv4_key);
             return n;
@@ -611,12 +649,16 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key)
         char ipv6_dst_s[IPV6_SCAN_LEN + 1];
         int ipv6_proto;
         int ipv6_tos;
+        char frag[8];
+        enum ovs_frag_type ipv6_frag;
         int n = -1;
 
         if (sscanf(s, "ipv6(src="IPV6_SCAN_FMT",dst="IPV6_SCAN_FMT","
-                   "proto=%i,tos=%i)%n",
+                   "proto=%i,tos=%i,frag=%7[a-z])%n",
                    ipv6_src_s, ipv6_dst_s,
-                   &ipv6_proto, &ipv6_tos, &n) > 0 && n > 0) {
+                   &ipv6_proto, &ipv6_tos, frag, &n) > 0
+            && n > 0
+            && ovs_frag_type_from_string(frag, &ipv6_frag)) {
             struct ovs_key_ipv6 ipv6_key;
 
             memset(&ipv6_key, 0, sizeof ipv6_key);
@@ -626,6 +668,7 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key)
             }
             ipv6_key.ipv6_proto = ipv6_proto;
             ipv6_key.ipv6_tos = ipv6_tos;
+            ipv6_key.ipv6_frag = ipv6_frag;
             nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV6,
                               &ipv6_key, sizeof ipv6_key);
             return n;
@@ -789,6 +832,14 @@ odp_flow_key_from_string(const char *s, struct ofpbuf *key)
     return 0;
 }
 
+static uint8_t
+tos_frag_to_odp_frag(uint8_t tos_frag)
+{
+    return (tos_frag & FLOW_FRAG_LATER ? OVS_FRAG_TYPE_LATER
+            : tos_frag & FLOW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST
+            : OVS_FRAG_TYPE_NONE);
+}
+
 /* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. */
 void
 odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
@@ -833,7 +884,8 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
         ipv4_key->ipv4_src = flow->nw_src;
         ipv4_key->ipv4_dst = flow->nw_dst;
         ipv4_key->ipv4_proto = flow->nw_proto;
-        ipv4_key->ipv4_tos = flow->nw_tos;
+        ipv4_key->ipv4_tos = flow->tos_frag & IP_DSCP_MASK;
+        ipv4_key->ipv4_frag = tos_frag_to_odp_frag(flow->tos_frag);
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         struct ovs_key_ipv6 *ipv6_key;
 
@@ -843,7 +895,8 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
         memcpy(ipv6_key->ipv6_src, &flow->ipv6_src, sizeof ipv6_key->ipv6_src);
         memcpy(ipv6_key->ipv6_dst, &flow->ipv6_dst, sizeof ipv6_key->ipv6_dst);
         ipv6_key->ipv6_proto = flow->nw_proto;
-        ipv6_key->ipv6_tos = flow->nw_tos;
+        ipv6_key->ipv6_tos = flow->tos_frag & IP_DSCP_MASK;
+        ipv6_key->ipv6_frag = tos_frag_to_odp_frag(flow->tos_frag);
     } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
         struct ovs_key_arp *arp_key;
 
@@ -857,8 +910,9 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
         memcpy(arp_key->arp_tha, flow->arp_tha, ETH_ADDR_LEN);
     }
 
-    if (flow->dl_type == htons(ETH_TYPE_IP)
-            || flow->dl_type == htons(ETH_TYPE_IPV6)) {
+    if ((flow->dl_type == htons(ETH_TYPE_IP)
+         || flow->dl_type == htons(ETH_TYPE_IPV6))
+        && !(flow->tos_frag & FLOW_FRAG_LATER)) {
 
         if (flow->nw_proto == IPPROTO_TCP) {
             struct ovs_key_tcp *tcp_key;
@@ -906,6 +960,23 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
     }
 }
 
+static bool
+odp_to_tos_frag(uint8_t odp_tos, uint8_t odp_frag, struct flow *flow)
+{
+    if (odp_tos & ~IP_DSCP_MASK || odp_frag > OVS_FRAG_TYPE_LATER) {
+        return false;
+    }
+
+    flow->tos_frag = odp_tos;
+    if (odp_frag != OVS_FRAG_TYPE_NONE) {
+        flow->tos_frag |= FLOW_FRAG_ANY;
+        if (odp_frag == OVS_FRAG_TYPE_LATER) {
+            flow->tos_frag |= FLOW_FRAG_LATER;
+        }
+    }
+    return true;
+}
+
 /* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a flow
  * structure in 'flow'.  Returns 0 if successful, otherwise EINVAL. */
 int
@@ -990,8 +1061,8 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
             flow->nw_src = ipv4_key->ipv4_src;
             flow->nw_dst = ipv4_key->ipv4_dst;
             flow->nw_proto = ipv4_key->ipv4_proto;
-            flow->nw_tos = ipv4_key->ipv4_tos;
-            if (flow->nw_tos & IP_ECN_MASK) {
+            if (!odp_to_tos_frag(ipv4_key->ipv4_tos, ipv4_key->ipv4_frag,
+                                 flow)) {
                 return EINVAL;
             }
             break;
@@ -1004,8 +1075,8 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
             memcpy(&flow->ipv6_src, ipv6_key->ipv6_src, sizeof flow->ipv6_src);
             memcpy(&flow->ipv6_dst, ipv6_key->ipv6_dst, sizeof flow->ipv6_dst);
             flow->nw_proto = ipv6_key->ipv6_proto;
-            flow->nw_tos = ipv6_key->ipv6_tos;
-            if (flow->nw_tos & IP_ECN_MASK) {
+            if (!odp_to_tos_frag(ipv6_key->ipv6_tos, ipv6_key->ipv6_frag,
+                                 flow)) {
                 return EINVAL;
             }
             break;
@@ -1105,6 +1176,9 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
         return 0;
 
     case OVS_KEY_ATTR_IPV4:
+        if (flow->tos_frag & FLOW_FRAG_LATER) {
+            return 0;
+        }
         if (flow->nw_proto == IPPROTO_TCP
             || flow->nw_proto == IPPROTO_UDP
             || flow->nw_proto == IPPROTO_ICMP) {
@@ -1113,6 +1187,9 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
         return 0;
 
     case OVS_KEY_ATTR_IPV6:
+        if (flow->tos_frag & FLOW_FRAG_LATER) {
+            return 0;
+        }
         if (flow->nw_proto == IPPROTO_TCP
             || flow->nw_proto == IPPROTO_UDP
             || flow->nw_proto == IPPROTO_ICMPV6) {
@@ -1122,7 +1199,8 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
 
     case OVS_KEY_ATTR_ICMPV6:
         if (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT)
-            || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) {
+            || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)
+            || flow->tos_frag & FLOW_FRAG_LATER) {
             return EINVAL;
         }
         return 0;
@@ -1130,8 +1208,13 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
     case OVS_KEY_ATTR_TCP:
     case OVS_KEY_ATTR_UDP:
     case OVS_KEY_ATTR_ICMP:
-    case OVS_KEY_ATTR_ARP:
     case OVS_KEY_ATTR_ND:
+        if (flow->tos_frag & FLOW_FRAG_LATER) {
+            return EINVAL;
+        }
+        return 0;
+
+    case OVS_KEY_ATTR_ARP:
         return 0;
 
     case __OVS_KEY_ATTR_MAX:
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 64712b5..7bdbc22 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -604,21 +604,9 @@ ofp_print_switch_config(struct ds *string, const struct ofp_switch_config *osc)
 
     flags = ntohs(osc->flags);
 
-    ds_put_cstr(string, " frags=");
-    switch (flags & OFPC_FRAG_MASK) {
-    case OFPC_FRAG_NORMAL:
-        ds_put_cstr(string, "normal");
-        flags &= ~OFPC_FRAG_MASK;
-        break;
-    case OFPC_FRAG_DROP:
-        ds_put_cstr(string, "drop");
-        flags &= ~OFPC_FRAG_MASK;
-        break;
-    case OFPC_FRAG_REASM:
-        ds_put_cstr(string, "reassemble");
-        flags &= ~OFPC_FRAG_MASK;
-        break;
-    }
+    ds_put_format(string, " frags=%s", ofputil_frag_handling_to_string(flags));
+    flags &= ~OFPC_FRAG_MASK;
+
     if (flags) {
         ds_put_format(string, " ***unknown flags 0x%04"PRIx16"***", flags);
     }
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 0930196..328d0df 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -99,7 +99,7 @@ static const flow_wildcards_t WC_INVARIANTS = 0
 void
 ofputil_wildcard_from_openflow(uint32_t ofpfw, struct flow_wildcards *wc)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     /* Initialize most of rule->wc. */
     flow_wildcards_init_catchall(wc);
@@ -108,9 +108,10 @@ ofputil_wildcard_from_openflow(uint32_t ofpfw, struct flow_wildcards *wc)
     /* Wildcard fields that aren't defined by ofp_match or tun_id. */
     wc->wildcards |= (FWW_ARP_SHA | FWW_ARP_THA | FWW_ND_TARGET);
 
-    if (ofpfw & OFPFW_NW_TOS) {
-        wc->wildcards |= FWW_NW_TOS;
+    if (!(ofpfw & OFPFW_NW_TOS)) {
+        wc->tos_frag_mask |= IP_DSCP_MASK;
     }
+
     wc->nw_src_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_SRC_SHIFT);
     wc->nw_dst_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_DST_SHIFT);
 
@@ -151,7 +152,7 @@ ofputil_cls_rule_from_match(const struct ofp_match *match,
     rule->flow.tp_dst = match->tp_dst;
     memcpy(rule->flow.dl_src, match->dl_src, ETH_ADDR_LEN);
     memcpy(rule->flow.dl_dst, match->dl_dst, ETH_ADDR_LEN);
-    rule->flow.nw_tos = match->nw_tos;
+    rule->flow.tos_frag = match->nw_tos & IP_DSCP_MASK;
     rule->flow.nw_proto = match->nw_proto;
 
     /* Translate VLANs. */
@@ -190,7 +191,7 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule, struct ofp_match *match)
     ofpfw = (OVS_FORCE uint32_t) (wc->wildcards & WC_INVARIANTS);
     ofpfw |= ofputil_netmask_to_wcbits(wc->nw_src_mask) << OFPFW_NW_SRC_SHIFT;
     ofpfw |= ofputil_netmask_to_wcbits(wc->nw_dst_mask) << OFPFW_NW_DST_SHIFT;
-    if (wc->wildcards & FWW_NW_TOS) {
+    if (!(wc->tos_frag_mask & IP_DSCP_MASK)) {
         ofpfw |= OFPFW_NW_TOS;
     }
 
@@ -224,7 +225,7 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule, struct ofp_match *match)
     match->dl_type = ofputil_dl_type_to_openflow(rule->flow.dl_type);
     match->nw_src = rule->flow.nw_src;
     match->nw_dst = rule->flow.nw_dst;
-    match->nw_tos = rule->flow.nw_tos;
+    match->nw_tos = rule->flow.tos_frag & IP_DSCP_MASK;
     match->nw_proto = rule->flow.nw_proto;
     match->tp_src = rule->flow.tp_src;
     match->tp_dst = rule->flow.tp_dst;
@@ -790,7 +791,7 @@ ofputil_min_flow_format(const struct cls_rule *rule)
 {
     const struct flow_wildcards *wc = &rule->wc;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 2);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 3);
 
     /* Only NXM supports separately wildcards the Ethernet multicast bit. */
     if (!(wc->wildcards & FWW_DL_DST) != !(wc->wildcards & FWW_ETH_MCAST)) {
@@ -818,6 +819,11 @@ ofputil_min_flow_format(const struct cls_rule *rule)
         return NXFF_NXM;
     }
 
+    /* Only NXM supports matching fragments. */
+    if (wc->tos_frag_mask & FLOW_FRAG_MASK) {
+        return NXFF_NXM;
+    }
+
     /* Other formats can express this rule. */
     return NXFF_OPENFLOW10;
 }
@@ -1932,6 +1938,36 @@ make_echo_reply(const struct ofp_header *rq)
     return out;
 }
 
+const char *
+ofputil_frag_handling_to_string(enum ofp_config_flags flags)
+{
+    switch (flags & OFPC_FRAG_MASK) {
+    case OFPC_FRAG_NORMAL:   return "normal";
+    case OFPC_FRAG_DROP:     return "drop";
+    case OFPC_FRAG_REASM:    return "reassemble";
+    case OFPC_FRAG_NX_MATCH: return "nx-match";
+    }
+
+    NOT_REACHED();
+}
+
+bool
+ofputil_frag_handling_from_string(const char *s, enum ofp_config_flags *flags)
+{
+    if (!strcasecmp(s, "normal")) {
+        *flags = OFPC_FRAG_NORMAL;
+    } else if (!strcasecmp(s, "drop")) {
+        *flags = OFPC_FRAG_DROP;
+    } else if (!strcasecmp(s, "reassemble")) {
+        *flags = OFPC_FRAG_REASM;
+    } else if (!strcasecmp(s, "nx-match")) {
+        *flags = OFPC_FRAG_NX_MATCH;
+    } else {
+        return false;
+    }
+    return true;
+}
+
 /* Checks that 'port' is a valid output port for the OFPAT_OUTPUT action, given
  * that the switch will never have more than 'max_ports' ports.  Returns 0 if
  * 'port' is valid, otherwise an ofp_mkerr() return code. */
@@ -2401,7 +2437,7 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format)
         MAY_NW_ADDR     = 1 << 0, /* nw_src, nw_dst */
         MAY_TP_ADDR     = 1 << 1, /* tp_src, tp_dst */
         MAY_NW_PROTO    = 1 << 2, /* nw_proto */
-        MAY_NW_TOS      = 1 << 3, /* nw_tos */
+        MAY_TOS_FRAG    = 1 << 3, /* tos_frag */
         MAY_ARP_SHA     = 1 << 4, /* arp_sha */
         MAY_ARP_THA     = 1 << 5, /* arp_tha */
         MAY_IPV6_ADDR   = 1 << 6, /* ipv6_src, ipv6_dst */
@@ -2412,7 +2448,7 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format)
 
     /* Figure out what fields may be matched. */
     if (rule->flow.dl_type == htons(ETH_TYPE_IP)) {
-        may_match = MAY_NW_PROTO | MAY_NW_TOS | MAY_NW_ADDR;
+        may_match = MAY_NW_PROTO | MAY_TOS_FRAG | MAY_NW_ADDR;
         if (rule->flow.nw_proto == IPPROTO_TCP ||
             rule->flow.nw_proto == IPPROTO_UDP ||
             rule->flow.nw_proto == IPPROTO_ICMP) {
@@ -2420,7 +2456,7 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format)
         }
     } else if (rule->flow.dl_type == htons(ETH_TYPE_IPV6)
                && flow_format == NXFF_NXM) {
-        may_match = MAY_NW_PROTO | MAY_NW_TOS | MAY_IPV6_ADDR;
+        may_match = MAY_NW_PROTO | MAY_TOS_FRAG | MAY_IPV6_ADDR;
         if (rule->flow.nw_proto == IPPROTO_TCP ||
             rule->flow.nw_proto == IPPROTO_UDP) {
             may_match |= MAY_TP_ADDR;
@@ -2452,8 +2488,8 @@ ofputil_normalize_rule(struct cls_rule *rule, enum nx_flow_format flow_format)
     if (!(may_match & MAY_NW_PROTO)) {
         wc.wildcards |= FWW_NW_PROTO;
     }
-    if (!(may_match & MAY_NW_TOS)) {
-        wc.wildcards |= FWW_NW_TOS;
+    if (!(may_match & MAY_TOS_FRAG)) {
+        wc.tos_frag_mask = 0;
     }
     if (!(may_match & MAY_ARP_SHA)) {
         wc.wildcards |= FWW_ARP_SHA;
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index ecd77cc..5af9d2b 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -277,6 +277,9 @@ struct ofpbuf *make_unbuffered_packet_out(const struct ofpbuf *packet,
                                           uint16_t in_port, uint16_t out_port);
 struct ofpbuf *make_echo_request(void);
 struct ofpbuf *make_echo_reply(const struct ofp_header *rq);
+
+const char *ofputil_frag_handling_to_string(enum ofp_config_flags);
+bool ofputil_frag_handling_from_string(const char *, enum ofp_config_flags *);
 
 /* Actions. */
 
diff --git a/ofproto/netflow.c b/ofproto/netflow.c
index f0af436..9d9ef19 100644
--- a/ofproto/netflow.c
+++ b/ofproto/netflow.c
@@ -160,7 +160,7 @@ gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow,
     }
     nf_rec->tcp_flags = nf_flow->tcp_flags;
     nf_rec->ip_proto = expired->flow.nw_proto;
-    nf_rec->ip_tos = expired->flow.nw_tos;
+    nf_rec->ip_tos = expired->flow.tos_frag & IP_DSCP_MASK;
 
     /* NetFlow messages are limited to 30 records. */
     if (ntohs(nf_hdr->count) >= 30) {
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 0eeda7e..536a213 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -2946,13 +2946,26 @@ static struct rule_dpif *
 rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
                  uint8_t table_id)
 {
+    struct cls_rule *cls_rule;
+    struct classifier *cls;
+
     if (table_id >= N_TABLES) {
         return NULL;
     }
 
-    return rule_dpif_cast(rule_from_cls_rule(
-                              classifier_lookup(&ofproto->up.tables[table_id],
-                                                flow)));
+    cls = &ofproto->up.tables[table_id];
+    if ((flow->tos_frag & FLOW_FRAG_MASK) == FLOW_FRAG_ANY
+        && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
+        /* For OFPC_NORMAL frag_handling, we must pretend that transport ports
+         * are unavailable. */
+        struct flow ofpc_normal_flow = *flow;
+        ofpc_normal_flow.tp_src = htons(0);
+        ofpc_normal_flow.tp_dst = htons(0);
+        cls_rule = classifier_lookup(cls, &ofpc_normal_flow);
+    } else {
+        cls_rule = classifier_lookup(cls, flow);
+    }
+    return rule_dpif_cast(rule_from_cls_rule(cls_rule));
 }
 
 static void
@@ -3320,9 +3333,10 @@ commit_odp_actions(struct action_xlate_ctx *ctx)
         base->nw_dst = flow->nw_dst;
     }
 
-    if (base->nw_tos != flow->nw_tos) {
-        nl_msg_put_u8(odp_actions, OVS_ACTION_ATTR_SET_NW_TOS, flow->nw_tos);
-        base->nw_tos = flow->nw_tos;
+    if (base->tos_frag != flow->tos_frag) {
+        nl_msg_put_u8(odp_actions, OVS_ACTION_ATTR_SET_NW_TOS,
+                      flow->tos_frag & IP_DSCP_MASK);
+        base->tos_frag = flow->tos_frag;
     }
 
     commit_vlan_tci(ctx, flow->vlan_tci);
@@ -3752,7 +3766,8 @@ do_xlate_actions(const union ofp_action *in, size_t n_in,
             break;
 
         case OFPUTIL_OFPAT_SET_NW_TOS:
-            ctx->flow.nw_tos = ia->nw_tos.nw_tos & IP_DSCP_MASK;
+            ctx->flow.tos_frag &= ~IP_DSCP_MASK;
+            ctx->flow.tos_frag |= ia->nw_tos.nw_tos & IP_DSCP_MASK;
             break;
 
         case OFPUTIL_OFPAT_SET_TP_SRC:
@@ -3870,6 +3885,27 @@ xlate_actions(struct action_xlate_ctx *ctx,
 
     ctx->odp_actions = ofpbuf_new(512);
     ofpbuf_reserve(ctx->odp_actions, NL_A_U32_SIZE);
+
+    if (ctx->flow.tos_frag & FLOW_FRAG_ANY) {
+        switch (ctx->ofproto->up.frag_handling) {
+        case OFPC_FRAG_NORMAL:
+            /* We must pretend that transport ports are unavailable. */
+            ctx->flow.tp_src = htons(0);
+            ctx->flow.tp_dst = htons(0);
+            break;
+
+        case OFPC_FRAG_DROP:
+            return ctx->odp_actions;
+
+        case OFPC_FRAG_REASM:
+            NOT_REACHED();
+
+        case OFPC_FRAG_NX_MATCH:
+            /* Nothing to do. */
+            break;
+        }
+    }
+
     ctx->tags = 0;
     ctx->may_set_up_flow = true;
     ctx->has_learn = false;
@@ -4611,21 +4647,17 @@ rule_invalidate(const struct rule_dpif *rule)
 }
 
 static bool
-get_drop_frags(struct ofproto *ofproto_)
-{
-    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
-    bool drop_frags;
-
-    dpif_get_drop_frags(ofproto->dpif, &drop_frags);
-    return drop_frags;
-}
-
-static void
-set_drop_frags(struct ofproto *ofproto_, bool drop_frags)
+set_frag_handling(struct ofproto *ofproto_,
+                  enum ofp_config_flags frag_handling)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
 
-    dpif_set_drop_frags(ofproto->dpif, drop_frags);
+    if (frag_handling != OFPC_FRAG_REASM) {
+        ofproto->need_revalidate = true;
+        return true;
+    } else {
+        return false;
+    }
 }
 
 static int
@@ -4965,8 +4997,7 @@ const struct ofproto_class ofproto_dpif_class = {
     rule_get_stats,
     rule_execute,
     rule_modify_actions,
-    get_drop_frags,
-    set_drop_frags,
+    set_frag_handling,
     packet_out,
     set_netflow,
     get_netflow_ids,
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index f596abc..38dbd2d 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -51,6 +51,7 @@ struct ofproto {
     char *sw_desc;              /* Software version. */
     char *serial_desc;          /* Serial number. */
     char *dp_desc;              /* Datapath description. */
+    enum ofp_config_flags frag_handling; /* One of OFPC_*.  */
 
     /* Datapath. */
     struct hmap ports;          /* Contains "struct ofport"s. */
@@ -81,7 +82,6 @@ struct ofport *ofproto_get_port(const struct ofproto *, uint16_t ofp_port);
          (CLS) < &(OFPROTO)->tables[(OFPROTO)->n_tables];   \
          (CLS)++)
 
-
 /* An OpenFlow port within a "struct ofproto".
  *
  * With few exceptions, ofproto implementations may look at these fields but
@@ -807,14 +807,36 @@ struct ofproto_class {
      * rule. */
     void (*rule_modify_actions)(struct rule *rule);
 
-    /* These functions implement the OpenFlow IP fragment handling policy.  By
-     * default ('drop_frags' == false), an OpenFlow switch should treat IP
-     * fragments the same way as other packets (although TCP and UDP port
-     * numbers cannot be determined).  With 'drop_frags' == true, the switch
-     * should drop all IP fragments without passing them through the flow
-     * table. */
-    bool (*get_drop_frags)(struct ofproto *ofproto);
-    void (*set_drop_frags)(struct ofproto *ofproto, bool drop_frags);
+    /* Changes the OpenFlow IP fragment handling policy to 'frag_handling',
+     * which takes one of the following values, with the corresponding
+     * meanings:
+     *
+     *  - OFPC_FRAG_NORMAL: The switch should treat IP fragments the same way
+     *    as other packets, omitting TCP and UDP port numbers (always setting
+     *    them to 0).
+     *
+     *  - OFPC_FRAG_DROP: The switch should drop all IP fragments without
+     *    passing them through the flow table.
+     *
+     *  - OFPC_FRAG_REASM: The switch should reassemble IP fragments before
+     *    passing packets through the flow table.
+     *
+     *  - OFPC_FRAG_NX_MATCH (a Nicira extension): Similar to OFPC_FRAG_NORMAL,
+     *    except that TCP and UDP port numbers should be included in fragments
+     *    with offset 0.
+     *
+     * Implementations are not required to support every mode.
+     * OFPC_FRAG_NORMAL is the default mode when an ofproto is created.
+     *
+     * At the time of the call to ->set_frag_handling(), the current mode is
+     * available in 'ofproto->frag_handling'.  ->set_frag_handling() returns
+     * true if the requested mode was set, false if it is not supported.
+     *
+     * Upon successful return, the caller changes 'ofproto->frag_handling' to
+     * reflect the new mode.
+     */
+    bool (*set_frag_handling)(struct ofproto *ofproto,
+                              enum ofp_config_flags frag_handling);
 
     /* Implements the OpenFlow OFPT_PACKET_OUT command.  The datapath should
      * execute the 'n_actions' in the 'actions' array on 'packet'.
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 1cc1e4e..d64901e 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -327,6 +327,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
     ofproto->sw_desc = xstrdup(DEFAULT_SW_DESC);
     ofproto->serial_desc = xstrdup(DEFAULT_SERIAL_DESC);
     ofproto->dp_desc = xstrdup(DEFAULT_DP_DESC);
+    ofproto->frag_handling = OFPC_FRAG_NORMAL;
     hmap_init(&ofproto->ports);
     shash_init(&ofproto->port_by_name);
     ofproto->tables = NULL;
@@ -1580,18 +1581,12 @@ static int
 handle_get_config_request(struct ofconn *ofconn, const struct ofp_header *oh)
 {
     struct ofproto *ofproto = ofconn_get_ofproto(ofconn);
-    struct ofpbuf *buf;
     struct ofp_switch_config *osc;
-    uint16_t flags;
-    bool drop_frags;
-
-    /* Figure out flags. */
-    drop_frags = ofproto->ofproto_class->get_drop_frags(ofproto);
-    flags = drop_frags ? OFPC_FRAG_DROP : OFPC_FRAG_NORMAL;
+    struct ofpbuf *buf;
 
     /* Send reply. */
     osc = make_openflow_xid(sizeof *osc, OFPT_GET_CONFIG_REPLY, oh->xid, &buf);
-    osc->flags = htons(flags);
+    osc->flags = htons(ofproto->frag_handling);
     osc->miss_send_len = htons(ofconn_get_miss_send_len(ofconn));
     ofconn_send_reply(ofconn, buf);
 
@@ -1604,19 +1599,20 @@ handle_set_config(struct ofconn *ofconn, const struct ofp_switch_config *osc)
     struct ofproto *ofproto = ofconn_get_ofproto(ofconn);
     uint16_t flags = ntohs(osc->flags);
 
-    if (ofconn_get_type(ofconn) == OFCONN_PRIMARY
-        && ofconn_get_role(ofconn) != NX_ROLE_SLAVE) {
-        switch (flags & OFPC_FRAG_MASK) {
-        case OFPC_FRAG_NORMAL:
-            ofproto->ofproto_class->set_drop_frags(ofproto, false);
-            break;
-        case OFPC_FRAG_DROP:
-            ofproto->ofproto_class->set_drop_frags(ofproto, true);
-            break;
-        default:
-            VLOG_WARN_RL(&rl, "requested bad fragment mode (flags=%"PRIx16")",
-                         osc->flags);
-            break;
+    if (ofconn_get_type(ofconn) != OFCONN_PRIMARY
+        || ofconn_get_role(ofconn) != NX_ROLE_SLAVE) {
+        enum ofp_config_flags cur = ofproto->frag_handling;
+        enum ofp_config_flags next = flags & OFPC_FRAG_MASK;
+
+        assert((cur & OFPC_FRAG_MASK) == cur);
+        if (cur != next) {
+            if (ofproto->ofproto_class->set_frag_handling(ofproto, next)) {
+                ofproto->frag_handling = next;
+            } else {
+                VLOG_WARN_RL(&rl, "%s: unsupported fragment handling mode %s",
+                             ofproto->name,
+                             ofputil_frag_handling_to_string(next));
+            }
         }
     }
 
diff --git a/tests/flowgen.pl b/tests/flowgen.pl
index d397515..95c8f48 100755
--- a/tests/flowgen.pl
+++ b/tests/flowgen.pl
@@ -1,6 +1,6 @@
 #! /usr/bin/perl
 
-# Copyright (c) 2009, 2010 Nicira Networks.
+# Copyright (c) 2009, 2010, 2011 Nicira Networks.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -98,7 +98,7 @@ sub output {
         } else {
             die;
         }
-        if ($attrs{IP_FRAGMENT} ne 'no') {
+        if ($attrs{IP_FRAGMENT} ne 'no' && $attrs{IP_FRAGMENT} ne 'first') {
             $flow{TP_SRC} = $flow{TP_DST} = 0;
         }
     } elsif ($attrs{DL_TYPE} eq 'non-ip') {
@@ -158,14 +158,14 @@ sub output {
 
             if ($attrs{TP_PROTO} =~ '^TCP') {
                 my $tcp = pack('nnNNnnnn',
-                               $flow{TP_SRC},           # source port
-                               $flow{TP_DST},           # dest port
-                               87123455,                # seqno
-                               712378912,               # ackno
+                               $flow{TP_SRC},     # source port
+                               $flow{TP_DST},     # dest port
+                               87123455,          # seqno
+                               712378912,         # ackno
                                (5 << 12) | 0x02 | 0x10, # hdrlen, SYN, ACK
                                5823,                    # window size
                                18923,                   # checksum
-                               12893);                  # urgent pointer
+                               12893); # urgent pointer
                 if ($attrs{TP_PROTO} eq 'TCP+options') {
                     substr($tcp, 12, 2) = pack('n', (6 << 12) | 0x02 | 0x10);
                     $tcp .= pack('CCn', 2, 4, 1975); # MSS option
@@ -179,17 +179,16 @@ sub output {
                 $ip .= $udp;
             } elsif ($attrs{TP_PROTO} eq 'ICMP') {
                 $ip .= pack('CCnnn',
-                            8,    # echo request
-                            0,    # code
-                            0,    # checksum
-                            736,  # identifier
-                            931); # sequence number
+                            8,        # echo request
+                            0,        # code
+                            0,        # checksum
+                            736,      # identifier
+                            931);     # sequence number
             } elsif ($attrs{TP_PROTO} eq 'other') {
                 $ip .= 'other header';
             } else {
                 die;
             }
-
             substr($ip, 2, 2) = pack('n', length($ip));
             $packet .= $ip;
         }
diff --git a/tests/odp.at b/tests/odp.at
index 21aa897..65a9fb9 100644
--- a/tests/odp.at
+++ b/tests/odp.at
@@ -2,18 +2,22 @@ AT_SETUP([OVS datapath parsing and formatting - valid forms])
 AT_DATA([odp-base.txt], [dnl
 in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15)
 in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0),tcp(src=80,dst=8080)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0),udp(src=81,dst=6632)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0),icmp(type=1,code=2)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=6,tos=0),tcp(src=80,dst=8080)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=17,tos=0),udp(src=6630,dst=22)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=1,code=2)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=135,code=0),nd(target=::3)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=first)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=128,frag=later)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,frag=no),tcp(src=80,dst=8080)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,frag=no),udp(src=81,dst=6632)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,frag=no),icmp(type=1,code=2)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=no)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=first)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=10,tos=112,frag=later)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=6,tos=0,frag=no),tcp(src=80,dst=8080)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=17,tos=0,frag=no),udp(src=6630,dst=22)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=1,code=2)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,proto=58,tos=0,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
 in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)
 ])
 
@@ -31,7 +35,15 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp
  echo
  echo '# Valid forms with tun_id and VLAN headers.'
  sed 's/^/tun_id(0xfedcba9876543210),/
-s/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt) > odp.txt
+s/eth([[^)]]*)/&,vlan(vid=99,pcp=7)/' odp-base.txt
+
+ echo
+ echo '# Valid forms with IP first fragment.'
+sed -n 's/,frag=no),/,frag=first),/p' odp-base.txt
+
+ echo
+ echo '# Valid forms with IP later fragment.'
+sed -n 's/,frag=no),.*/,frag=later)/p' odp-base.txt) > odp.txt
 AT_CAPTURE_FILE([odp.txt])
 AT_CHECK_UNQUOTED([test-odp < odp.txt], [0], [`cat odp.txt`
 ])
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index b5ca08c..0c86f85 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -11,7 +11,7 @@ table=1 in_port=2 priority=1500 icmp actions=output(17),resubmit(,2)
 table=1 in_port=3 priority=1500 icmp actions=output(14),resubmit(,2)
 ])
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: 10,11,12,13,14,15,16,17,18,19,20,21
 ])
@@ -36,7 +36,7 @@ in_port=10,reg1=0xdeadbeef actions=output:21
 in_port=11,reg2=0xeef22dea actions=output:22
 ])
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: 20,21,22
 ])
@@ -55,7 +55,7 @@ in_port=6 actions=output:NXM_NX_REG0[[0..15]],output:NXM_NX_REG0[[16..31]]
 in_port=7 actions=load:0x110000ff->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]]
 ])
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: 9,55,10,55,66,11,77,88
 ])
@@ -73,7 +73,7 @@ in_port=4 actions=set_tunnel:4,set_tunnel:3,output:4
 in_port=5 actions=set_tunnel:5
 ])
 AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'tun_id(0x1),in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 'tun_id(0x1),in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,frag=no),icmp(type=8,code=0)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: set_tunnel(0x1),1,2,set_tunnel(0x3),3,4
 ])
@@ -225,3 +225,44 @@ done
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - fragment handling])
+OFPROTO_START
+AT_DATA([flows.txt], [dnl
+priority=75 tcp ip_frag=no    tp_dst=80 actions=output:1
+priority=75 tcp ip_frag=first tp_dst=80 actions=output:2
+priority=75 tcp ip_frag=later tp_dst=80 actions=output:3
+priority=50 tcp ip_frag=no              actions=output:4
+priority=50 tcp ip_frag=first           actions=output:5
+priority=50 tcp ip_frag=later           actions=output:6
+])
+AT_CHECK([ovs-ofctl replace-flows br0 flows.txt])
+
+base_flow="in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0"
+no_flow="$base_flow,frag=no),tcp(src=12345,dst=80)"
+first_flow="$base_flow,frag=first),tcp(src=12345,dst=80)"
+later_flow="$base_flow,frag=later)"
+
+    # mode    no  first  later
+for tuple in \
+    'normal    1     5      6' \
+    'drop      1  drop   drop' \
+    'nx-match  1     2      6'
+do
+  set $tuple
+  mode=$1
+  no=$2
+  first=$3
+  later=$4
+
+  AT_CHECK([ovs-ofctl set-frags br0 $mode])
+  for type in no first later; do
+    eval flow=\$${type}_flow exp_output=\$$type
+    AT_CHECK([ovs-appctl -t test-openflowd ofproto/trace br0 "$flow"],
+             [0], [stdout])
+    AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $exp_output
+])
+  done
+done
+OFPROTO_STOP
+AT_CLEANUP
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 9dcd249..f3feff3 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -301,6 +301,32 @@ NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGE
 NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3b) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4)
 NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4)
 
+# IPv4 fragments.
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/fd)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(03/03)
+NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(f3)
+
+# IPv6 fragments.
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/03)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/01)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(03/03)
+NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(f3)
+
 # Tunnel ID.
 NXM_NX_TUN_ID(00000000abcdef01)
 NXM_NX_TUN_ID_W(84200000abcdef01/84200000FFFFFFFF)
@@ -453,6 +479,32 @@ nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_
 nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ)
 nx_pull_match() returned error 44010104 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_PREREQ)
 
+# IPv4 fragments.
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/01)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03)
+nx_pull_match() returned error 44010102 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_VALUE)
+
+# IPv6 fragments.
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(01)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(02)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/01)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/02)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(01/01)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(02/02)
+NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03)
+nx_pull_match() returned error 44010102 (type OFPET_BAD_REQUEST, code NXBRC_NXM_BAD_VALUE)
+
 # Tunnel ID.
 NXM_NX_TUN_ID(00000000abcdef01)
 NXM_NX_TUN_ID_W(84200000abcdef01/84200000ffffffff)
diff --git a/tests/test-classifier.c b/tests/test-classifier.c
index 5f5d797..0e2b13f 100644
--- a/tests/test-classifier.c
+++ b/tests/test-classifier.c
@@ -55,7 +55,7 @@
     CLS_FIELD(FWW_DL_SRC,                 dl_src,      DL_SRC)      \
     CLS_FIELD(FWW_DL_DST | FWW_ETH_MCAST, dl_dst,      DL_DST)      \
     CLS_FIELD(FWW_NW_PROTO,               nw_proto,    NW_PROTO)    \
-    CLS_FIELD(FWW_NW_TOS,                 nw_tos,      NW_TOS)
+    CLS_FIELD(0,                          tos_frag,    TOS_FRAG)
 
 /* Field indexes.
  *
@@ -203,6 +203,9 @@ match(const struct cls_rule *wild, const struct flow *fixed)
                    & wild->wc.vlan_tci_mask);
         } else if (f_idx == CLS_F_IDX_TUN_ID) {
             eq = !((fixed->tun_id ^ wild->flow.tun_id) & wild->wc.tun_id_mask);
+        } else if (f_idx == CLS_F_IDX_TOS_FRAG) {
+            eq = !((fixed->tos_frag ^ wild->flow.tos_frag)
+                   & wild->wc.tos_frag_mask);
         } else {
             NOT_REACHED();
         }
@@ -263,7 +266,7 @@ static uint8_t dl_src_values[][6] = { { 0x00, 0x02, 0xe3, 0x0f, 0x80, 0xa4 },
 static uint8_t dl_dst_values[][6] = { { 0x4a, 0x27, 0x71, 0xae, 0x64, 0xc1 },
                                       { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } };
 static uint8_t nw_proto_values[] = { IPPROTO_TCP, IPPROTO_ICMP };
-static uint8_t nw_tos_values[] = { 49, 0 };
+static uint8_t tos_frag_values[] = { 48, 0 };
 
 static void *values[CLS_N_FIELDS][2];
 
@@ -297,8 +300,8 @@ init_values(void)
     values[CLS_F_IDX_NW_PROTO][0] = &nw_proto_values[0];
     values[CLS_F_IDX_NW_PROTO][1] = &nw_proto_values[1];
 
-    values[CLS_F_IDX_NW_TOS][0] = &nw_tos_values[0];
-    values[CLS_F_IDX_NW_TOS][1] = &nw_tos_values[1];
+    values[CLS_F_IDX_TOS_FRAG][0] = &tos_frag_values[0];
+    values[CLS_F_IDX_TOS_FRAG][1] = &tos_frag_values[1];
 
     values[CLS_F_IDX_TP_SRC][0] = &tp_src_values[0];
     values[CLS_F_IDX_TP_SRC][1] = &tp_src_values[1];
@@ -318,7 +321,7 @@ init_values(void)
 #define N_DL_SRC_VALUES ARRAY_SIZE(dl_src_values)
 #define N_DL_DST_VALUES ARRAY_SIZE(dl_dst_values)
 #define N_NW_PROTO_VALUES ARRAY_SIZE(nw_proto_values)
-#define N_NW_TOS_VALUES ARRAY_SIZE(nw_tos_values)
+#define N_TOS_FRAG_VALUES ARRAY_SIZE(tos_frag_values)
 
 #define N_FLOW_VALUES (N_NW_SRC_VALUES *        \
                        N_NW_DST_VALUES *        \
@@ -331,7 +334,7 @@ init_values(void)
                        N_DL_SRC_VALUES *        \
                        N_DL_DST_VALUES *        \
                        N_NW_PROTO_VALUES *      \
-                       N_NW_TOS_VALUES)
+                       N_TOS_FRAG_VALUES)
 
 static unsigned int
 get_value(unsigned int *x, unsigned n_values)
@@ -367,7 +370,7 @@ compare_classifiers(struct classifier *cls, struct tcls *tcls)
         memcpy(flow.dl_dst, dl_dst_values[get_value(&x, N_DL_DST_VALUES)],
                ETH_ADDR_LEN);
         flow.nw_proto = nw_proto_values[get_value(&x, N_NW_PROTO_VALUES)];
-        flow.nw_tos = nw_tos_values[get_value(&x, N_NW_TOS_VALUES)];
+        flow.tos_frag = tos_frag_values[get_value(&x, N_TOS_FRAG_VALUES)];
 
         cr0 = classifier_lookup(cls, &flow);
         cr1 = tcls_lookup(tcls, &flow);
@@ -465,6 +468,8 @@ make_rule(int wc_fields, unsigned int priority, int value_pat)
             rule->cls_rule.wc.vlan_tci_mask = htons(UINT16_MAX);
         } else if (f_idx == CLS_F_IDX_TUN_ID) {
             rule->cls_rule.wc.tun_id_mask = htonll(UINT64_MAX);
+        } else if (f_idx == CLS_F_IDX_TOS_FRAG) {
+            rule->cls_rule.wc.tos_frag_mask = UINT8_MAX;
         } else {
             NOT_REACHED();
         }
diff --git a/tests/test-flows.c b/tests/test-flows.c
index 559af3a..57157c9 100644
--- a/tests/test-flows.c
+++ b/tests/test-flows.c
@@ -80,8 +80,11 @@ main(int argc OVS_UNUSED, char *argv[])
             printf("Packet:\n");
             ofp_print_packet(stdout, packet->data, packet->size, packet->size);
             ovs_hex_dump(stdout, packet->data, packet->size, 0, true);
+            cls_rule_print(&rule);
             printf("Expected flow:\n%s\n", exp_s);
             printf("Actually extracted flow:\n%s\n", got_s);
+            ovs_hex_dump(stdout, &expected_match, sizeof expected_match, 0, false);
+            ovs_hex_dump(stdout, &extracted_match, sizeof extracted_match, 0, false);
             printf("\n");
             free(exp_s);
             free(got_s);
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index 4d0d3c2..d78fb31 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -371,12 +371,9 @@ show_dpif(struct dpif *dpif)
 
     printf("%s:\n", dpif_name(dpif));
     if (!dpif_get_dp_stats(dpif, &stats)) {
-        printf("\tlookups: frags:%"PRIu64, stats.n_frags);
-        printf(" hit:%"PRIu64, stats.n_hit);
-        printf(" missed:%"PRIu64, stats.n_missed);
-        printf(" lost:%"PRIu64"\n", stats.n_lost);
-
-        printf("\tflows: %"PRIu64"\n", stats.n_flows);
+        printf("\tlookups: hit:%"PRIu64" missed:%"PRIu64" lost:%"PRIu64"\n"
+               "\tflows: %"PRIu64"\n",
+               stats.n_hit, stats.n_missed, stats.n_lost, stats.n_flows);
     }
     DPIF_PORT_FOR_EACH (&dpif_port, &dump, dpif) {
         printf("\tport %u: %s", dpif_port.port_no, dpif_port.name);
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index f2ed8a4..215f8f9 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -92,6 +92,39 @@ spanning tree protocol is not in use.
 .
 .RE
 .
+.IP "\fBget\-frags \fIswitch\fR"
+Prints \fIswitch\fR's fragment handling mode.  See \fBset\-frags\fR,
+below, for a description of each fragment handling mode.
+.IP
+The \fBshow\fR command also prints the fragment handling mode among
+its other output.
+.
+.IP "\fBset\-frags \fIswitch frag_mode\fR"
+Configures \fIswitch\fR's treatment of IPv4 and IPv6 fragments.  The
+choices for \fIfrag_mode\fR are:
+.RS
+.IP "\fBnormal\fR"
+Fragments pass through the flow table like non-fragmented packets.
+The TCP ports, UDP ports, and ICMP type and code fields are always set
+to 0, even for fragments where that information would otherwise be
+available (fragments with offset 0).  This is the default fragment
+handling mode for an OpenFlow switch.
+.IP "\fBdrop\fR"
+Fragments are dropped without passing through the flow table.
+.IP "\fBreassemble\fR"
+The switch reassembles fragments into full IP packets before passing
+them through the flow table.  Open vSwitch does not implement this
+fragment handling mode.
+.IP "\fBnx\-match\fR"
+Fragments pass through the flow table like non-fragmented packets.
+The TCP ports, UDP ports, and ICMP type and code fields are available
+for matching for fragments with offset 0, and set to 0 in fragments
+with nonzero offset.  This mode is a Nicira extension.
+.RE
+.IP
+See the description of \fBip_frag\fR, below, for a way to match on
+whether a packet is a fragment and on its fragment offset.
+.
 .TP
 \fBdump\-flows \fIswitch \fR[\fIflows\fR]
 Prints to the console all flow entries in \fIswitch\fR's
@@ -476,6 +509,27 @@ Match packets with no 802.1Q header or tagged with VLAN 0 and priority
 Some of these matching possibilities can also be achieved with
 \fBdl_vlan\fR and \fBdl_vlan_pcp\fR.
 .
+.IP \fBip_frag=\fIfrag_type\fR
+When \fBdl_type\fR specifies IP or IPv6, \fIfrag_type\fR
+specifies what kind of IP fragments or non-fragments to match.  The
+following values of \fIfrag_type\fR are supported:
+.RS
+.IP "\fBno\fR"
+Matches only non-fragmented packets.
+.IP "\fByes\fR"
+Matches all fragments.
+.IP "\fBfirst\fR"
+Matches only fragments with offset 0.
+.IP "\fBlater\fR"
+Matches only fragments with nonzero offset.
+.IP "\fBnot_later\fR"
+Matches non-fragmented packets and fragments with zero offset.
+.RE
+.IP
+The \fBip_frag\fR match type is likely to be most useful in
+\fBnx\-match\fR mode.  See the description of the \fBset\-frags\fR
+command, above, for more details.
+.
 .IP \fBarp_sha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR
 .IQ \fBarp_tha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR
 When \fBdl_type\fR specifies ARP, \fBarp_sha\fR and \fBarp_tha\fR match
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index 164d083..ce9723b 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -172,6 +172,8 @@ usage(void)
            "  dump-desc SWITCH            print switch description\n"
            "  dump-tables SWITCH          print table stats\n"
            "  mod-port SWITCH IFACE ACT   modify port behavior\n"
+           "  get-frags SWITCH            print fragment handling behavior\n"
+           "  set-frags SWITCH FRAG_MODE  set fragment handling behavior\n"
            "  dump-ports SWITCH [PORT]    print port statistics\n"
            "  dump-flows SWITCH           print all flow entries\n"
            "  dump-flows SWITCH FLOW      print matching FLOWs\n"
@@ -351,7 +353,9 @@ dump_trivial_stats_transaction(const char *vconn_name, uint8_t stats_type)
 
 /* Sends 'request', which should be a request that only has a reply if an error
  * occurs, and waits for it to succeed or fail.  If an error does occur, prints
- * it and exits with an error. */
+ * it and exits with an error.
+ *
+ * Destroys all of the 'requests'. */
 static void
 transact_multiple_noreply(struct vconn *vconn, struct list *requests)
 {
@@ -372,7 +376,9 @@ transact_multiple_noreply(struct vconn *vconn, struct list *requests)
 
 /* Sends 'request', which should be a request that only has a reply if an error
  * occurs, and waits for it to succeed or fail.  If an error does occur, prints
- * it and exits with an error. */
+ * it and exits with an error.
+ *
+ * Destroys 'request'. */
 static void
 transact_noreply(struct vconn *vconn, struct ofpbuf *request)
 {
@@ -384,6 +390,44 @@ transact_noreply(struct vconn *vconn, struct ofpbuf *request)
 }
 
 static void
+fetch_switch_config(struct vconn *vconn, struct ofp_switch_config *config_)
+{
+    struct ofp_switch_config *config;
+    struct ofp_header *header;
+    struct ofpbuf *request;
+    struct ofpbuf *reply;
+
+    make_openflow(sizeof(struct ofp_header), OFPT_GET_CONFIG_REQUEST,
+                  &request);
+    run(vconn_transact(vconn, request, &reply),
+        "talking to %s", vconn_get_name(vconn));
+
+    header = reply->data;
+    if (header->type != OFPT_GET_CONFIG_REPLY ||
+        header->length != htons(sizeof *config)) {
+        ovs_fatal(0, "%s: bad reply to config request", vconn_get_name(vconn));
+    }
+
+    config = reply->data;
+    *config_ = *config;
+}
+
+static void
+set_switch_config(struct vconn *vconn, struct ofp_switch_config *config_)
+{
+    struct ofp_switch_config *config;
+    struct ofp_header save_header;
+    struct ofpbuf *request;
+
+    config = make_openflow(sizeof *config, OFPT_SET_CONFIG, &request);
+    save_header = config->header;
+    *config = *config_;
+    config->header = save_header;
+
+    transact_noreply(vconn, request);
+}
+
+static void
 do_show(int argc OVS_UNUSED, char *argv[])
 {
     dump_trivial_transaction(argv[1], OFPT_FEATURES_REQUEST);
@@ -720,13 +764,11 @@ do_monitor(int argc, char *argv[])
 
     open_vconn(argv[1], &vconn);
     if (argc > 2) {
-        int miss_send_len = atoi(argv[2]);
-        struct ofp_switch_config *osc;
-        struct ofpbuf *buf;
+        struct ofp_switch_config config;
 
-        osc = make_openflow(sizeof *osc, OFPT_SET_CONFIG, &buf);
-        osc->miss_send_len = htons(miss_send_len);
-        transact_noreply(vconn, buf);
+        fetch_switch_config(vconn, &config);
+        config.miss_send_len = htons(atoi(argv[2]));
+        set_switch_config(vconn, &config);
     }
     monitor_vconn(vconn);
 }
@@ -807,6 +849,51 @@ do_mod_port(int argc OVS_UNUSED, char *argv[])
 }
 
 static void
+do_get_frags(int argc OVS_UNUSED, char *argv[])
+{
+    struct ofp_switch_config config;
+    struct vconn *vconn;
+
+    open_vconn(argv[1], &vconn);
+    fetch_switch_config(vconn, &config);
+    puts(ofputil_frag_handling_to_string(ntohs(config.flags)));
+    vconn_close(vconn);
+}
+
+static void
+do_set_frags(int argc OVS_UNUSED, char *argv[])
+{
+    struct ofp_switch_config config;
+    enum ofp_config_flags mode;
+    struct vconn *vconn;
+    ovs_be16 flags;
+
+    if (!ofputil_frag_handling_from_string(argv[2], &mode)) {
+        ovs_fatal(0, "%s: unknown fragment handling mode", argv[2]);
+    }
+
+    open_vconn(argv[1], &vconn);
+    fetch_switch_config(vconn, &config);
+    flags = htons(mode) | (config.flags & htons(~OFPC_FRAG_MASK));
+    if (flags != config.flags) {
+        /* Set the configuration. */
+        config.flags = flags;
+        set_switch_config(vconn, &config);
+
+        /* Then retrieve the configuration to see if it really took.  OpenFlow
+         * doesn't define error reporting for bad modes, so this is all we can
+         * do. */
+        fetch_switch_config(vconn, &config);
+        if (flags != config.flags) {
+            ovs_fatal(0, "%s: setting fragment handling mode failed (this "
+                      "switch probably doesn't support mode \"%s\")",
+                      argv[1], ofputil_frag_handling_to_string(mode));
+        }
+    }
+    vconn_close(vconn);
+}
+
+static void
 do_ping(int argc, char *argv[])
 {
     size_t max_payload = 65535 - sizeof(struct ofp_header);
@@ -1442,6 +1529,8 @@ static const struct command all_commands[] = {
     { "diff-flows", 2, 2, do_diff_flows },
     { "dump-ports", 1, 2, do_dump_ports },
     { "mod-port", 3, 3, do_mod_port },
+    { "get-frags", 1, 1, do_get_frags },
+    { "set-frags", 2, 2, do_set_frags },
     { "probe", 1, 1, do_probe },
     { "ping", 1, 2, do_ping },
     { "benchmark", 3, 3, do_benchmark },
-- 
1.7.4.4




More information about the dev mailing list