[ovs-dev] [PATCH 1/3] v7: datapath: Add support for tun_key to Open vSwitch datapath

Kyle Mestery (kmestery) kmestery at cisco.com
Tue Oct 16 22:34:30 UTC 2012


I have fixed some of the same, let me compare mine to yours. Did you only take my first patch? I have some slight rework on patch #2 as well .

Sent from my iPhone

On Oct 16, 2012, at 1:16 PM, "Pravin B Shelar" <pshelar at nicira.com> wrote:

> This patch was posted by Kyle. I fixed few issues found in earlier
> version.
> 
> V7:
> - Fix according to comments posted on V6.
> V6:
> - Fix more comments addressed from Jesse.
> V5:
> - Address another round of comments from Jesse.
> V4:
> - Address 2 comments from Jesse:
>  - When processing actions, if OVS_CB(skb)->tun_key is NULL, point it at one
>    on the stack temporarily. This goes away when we remove the ability to set
>    tun_id outside the scope of tun_key.
>  - Move tun_key to the end of struct sw_flow_key.
> V3:
> - Fix issues found during review by Jesse.
> - Add a NEWS entry around tunnel code no longer assuming symmetric input and
>  output tunnel keys.
> 
> V2:
> - Fix blank line addition/removal found by Simon.
> - Fix hex printing output found by Simon.
> 
> --8<--------------------------cut here-------------------------->8--
> 
> This is a first pass at providing a tun_key which can be
> used as the basis for flow-based tunnelling. The
> tun_key includes and replaces the tun_id in both struct
> ovs_skb_cb and struct sw_tun_key.
> 
> This patch allows all existing tun_id behaviour to still work. Existing
> users of tun_id are redirected to tun_key->tun_id to retain compatibility.
> However, when the userspace code is updated to make use of the new tun_key,
> the old behaviour will be deprecated and removed.
> 
> NOTE: With these changes, the tunneling code no longer assumes input and
> output keys are symmetric.  If they are not, PMTUD needs to be disabled
> for tunneling to work.
> 
> Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
> CC: Kyle Mestery <kmestery at cisco.com>
> Cc: Simon Horman <horms at verge.net.au>
> Cc: Jesse Gross <jesse at nicira.com>
> ---
> NEWS                        |    3 +
> datapath/actions.c          |   36 +++++--
> datapath/datapath.c         |    9 +-
> datapath/datapath.h         |    5 +-
> datapath/flow.c             |   86 +++++++++++++++--
> datapath/flow.h             |   12 ++-
> datapath/tunnel.c           |  217 ++++++++++++++++++++++++++-----------------
> datapath/tunnel.h           |   26 ++++--
> datapath/vport-capwap.c     |   92 ++++++++++++++----
> datapath/vport-gre.c        |  128 +++++++++++++++++++------
> datapath/vport.c            |    2 +-
> include/linux/openvswitch.h |   18 +++-
> lib/dpif-netdev.c           |    1 +
> lib/odp-util.c              |   15 ++-
> lib/odp-util.h              |    3 +-
> 15 files changed, 487 insertions(+), 166 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index d841cb3..5ab6a6a 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,8 @@
> post-v1.8.0
> ------------------------
> +    - The tunneling code no longer assumes input and output keys are symmetric.
> +      If they are not, PMTUD needs to be disabled for tunneling to work. Note
> +      this only applies to flow-based keys.
>     - FreeBSD is now a supported platform, thanks to code contributions from
>       Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
>     - ovs-bugtool: New --ovs option to report only OVS related information.
> diff --git a/datapath/actions.c b/datapath/actions.c
> index ec9b595..db85642 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -37,7 +37,8 @@
> #include "vport.h"
> 
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> -            const struct nlattr *attr, int len, bool keep_skb);
> +                  const struct nlattr *attr, int len,
> +                  struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb);
> 
> static int make_writable(struct sk_buff *skb, int write_len)
> {
> @@ -329,11 +330,14 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>    }
> 
>    return do_execute_actions(dp, skb, nla_data(acts_list),
> -                         nla_len(acts_list), true);
> +                  nla_len(acts_list),
> +                  OVS_CB(skb)->tun_key,
> +                  true);
> }
> 
> static int execute_set_action(struct sk_buff *skb,
> -                 const struct nlattr *nested_attr)
> +                 const struct nlattr *nested_attr,
> +                 struct ovs_key_ipv4_tunnel *tun_key)
> {
>    int err = 0;
> 
> @@ -343,7 +347,21 @@ static int execute_set_action(struct sk_buff *skb,
>        break;
> 
>    case OVS_KEY_ATTR_TUN_ID:
> -        OVS_CB(skb)->tun_id = nla_get_be64(nested_attr);
> +        if (!OVS_CB(skb)->tun_key) {
> +            /* If tun_key is NULL for this skb, assign it to
> +             * a value the caller passed in for action processing
> +             * and output. This can disappear once we drop support
> +             * for setting tun_id outside of tun_key.
> +             */
> +            memset(tun_key, 0, sizeof(struct ovs_key_ipv4_tunnel));
> +            OVS_CB(skb)->tun_key = tun_key;
> +        }
> +
> +        OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr);
> +        break;
> +
> +    case OVS_KEY_ATTR_IPV4_TUNNEL:
> +        OVS_CB(skb)->tun_key = nla_data(nested_attr);
>        break;
> 
>    case OVS_KEY_ATTR_ETHERNET:
> @@ -368,7 +386,8 @@ static int execute_set_action(struct sk_buff *skb,
> 
> /* Execute a list of actions against 'skb'. */
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> -            const struct nlattr *attr, int len, bool keep_skb)
> +            const struct nlattr *attr, int len,
> +            struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb)
> {
>    /* Every output action needs a separate clone of 'skb', but the common
>     * case is just a single output action, so that doing a clone and
> @@ -407,7 +426,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>            break;
> 
>        case OVS_ACTION_ATTR_SET:
> -            err = execute_set_action(skb, nla_data(a));
> +            err = execute_set_action(skb, nla_data(a), tun_key);
>            break;
> 
>        case OVS_ACTION_ATTR_SAMPLE:
> @@ -458,6 +477,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
>    struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
>    struct loop_counter *loop;
>    int error;
> +    struct ovs_key_ipv4_tunnel tun_key;
> 
>    /* Check whether we've looped too much. */
>    loop = &__get_cpu_var(loop_counters);
> @@ -469,9 +489,9 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
>        goto out_loop;
>    }
> 
> -    OVS_CB(skb)->tun_id = 0;
> +    OVS_CB(skb)->tun_key = NULL;
>    error = do_execute_actions(dp, skb, acts->actions,
> -                     acts->actions_len, false);
> +                     acts->actions_len, &tun_key, false);
> 
>    /* Check whether sub-actions looped too much. */
>    if (unlikely(loop->looping))
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index a6915fb..3f963be 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -587,12 +587,19 @@ static int validate_set(const struct nlattr *a,
> 
>    switch (key_type) {
>    const struct ovs_key_ipv4 *ipv4_key;
> +    const struct ovs_key_ipv4_tunnel *tun_key;
> 
>    case OVS_KEY_ATTR_PRIORITY:
>    case OVS_KEY_ATTR_TUN_ID:
>    case OVS_KEY_ATTR_ETHERNET:
>        break;
> 
> +    case OVS_KEY_ATTR_IPV4_TUNNEL:
> +        tun_key = nla_data(ovs_key);
> +        if (!tun_key->ipv4_dst)
> +            return -EINVAL;
> +        break;
> +
>    case OVS_KEY_ATTR_IPV4:
>        if (flow_key->eth.type != htons(ETH_P_IP))
>            return -EINVAL;
> @@ -785,7 +792,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
> 
>    err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
>                         &flow->key.phy.in_port,
> -                         &flow->key.phy.tun_id,
> +                         &flow->key.tun.tun_key,
>                         a[OVS_PACKET_ATTR_KEY]);
>    if (err)
>        goto err_flow_put;
> diff --git a/datapath/datapath.h b/datapath/datapath.h
> index affbf0e..c5df12d 100644
> --- a/datapath/datapath.h
> +++ b/datapath/datapath.h
> @@ -96,7 +96,8 @@ struct datapath {
> /**
>  * struct ovs_skb_cb - OVS data in skb CB
>  * @flow: The flow associated with this packet.  May be %NULL if no flow.
> - * @tun_id: ID of the tunnel that encapsulated this packet.  It is 0 if the
> + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
> + * packet is not being tunneled.
>  * @ip_summed: Consistently stores L4 checksumming status across different
>  * kernel versions.
>  * @csum_start: Stores the offset from which to start checksumming independent
> @@ -107,7 +108,7 @@ struct datapath {
>  */
> struct ovs_skb_cb {
>    struct sw_flow        *flow;
> -    __be64            tun_id;
> +    struct ovs_key_ipv4_tunnel  *tun_key;
> #ifdef NEED_CSUM_NORMALIZE
>    enum csum_type        ip_summed;
>    u16            csum_start;
> diff --git a/datapath/flow.c b/datapath/flow.c
> index d07337c..bec4ebf 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -629,7 +629,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
>    memset(key, 0, sizeof(*key));
> 
>    key->phy.priority = skb->priority;
> -    key->phy.tun_id = OVS_CB(skb)->tun_id;
> +    if (OVS_CB(skb)->tun_key)
> +        key->tun.tun_key = *OVS_CB(skb)->tun_key;
>    key->phy.in_port = in_port;
> 
>    skb_reset_mac_header(skb);
> @@ -847,6 +848,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
> 
>    /* Not upstream. */
>    [OVS_KEY_ATTR_TUN_ID] = sizeof(__be64),
> +    [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel),
> };
> 
> static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
> @@ -1022,9 +1024,39 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
>        swkey->phy.in_port = DP_MAX_PORTS;
>    }
> 
> -    if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
> -        swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
> +    if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) &&
> +        attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
> +        struct ovs_key_ipv4_tunnel *tun_key;
> +        __be64 tun_id;
> +
> +        tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
> +
> +        if (!tun_key->ipv4_dst)
> +            return -EINVAL;
> +        if (!(tun_key->tun_flags & FLOW_TNL_F_KEY))
> +            return -EINVAL;
> +
> +        tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
> +        if (tun_id != tun_key->tun_id)
> +            return -EINVAL;
> +
> +        swkey->tun.tun_key = *tun_key;
> +        attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
> +        attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
> +    } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
> +        swkey->tun.tun_key.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
> +        swkey->tun.tun_key.tun_flags |= FLOW_TNL_F_KEY;
> +
>        attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
> +    } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
> +        struct ovs_key_ipv4_tunnel *tun_key;
> +        tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
> +
> +        if (!tun_key->ipv4_dst)
> +            return -EINVAL;
> +
> +        swkey->tun.tun_key = *tun_key;
> +        attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
>    }
> 
>    /* Data attributes. */
> @@ -1162,14 +1194,16 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
>  * get the metadata, that is, the parts of the flow key that cannot be
>  * extracted from the packet itself.
>  */
> -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
> +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
> +                   struct ovs_key_ipv4_tunnel *tun_key,
>                   const struct nlattr *attr)
> {
>    const struct nlattr *nla;
>    int rem;
> +    __be64 tun_id;
> 
>    *in_port = DP_MAX_PORTS;
> -    *tun_id = 0;
> +    memset(tun_key, 0, sizeof(*tun_key));
>    *priority = 0;
> 
>    nla_for_each_nested(nla, attr, rem) {
> @@ -1185,7 +1219,35 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
>                break;
> 
>            case OVS_KEY_ATTR_TUN_ID:
> -                *tun_id = nla_get_be64(nla);
> +                tun_id = nla_get_be64(nla);
> +
> +                if (tun_key->ipv4_dst) {
> +                    if (!(tun_key->tun_flags & FLOW_TNL_F_KEY))
> +                        return -EINVAL;
> +                    if (tun_key->tun_id != tun_id)
> +                        return -EINVAL;
> +                    break;
> +                }
> +                tun_key->tun_id = tun_id;
> +                tun_key->tun_flags |= FLOW_TNL_F_KEY;
> +
> +                break;
> +
> +            case OVS_KEY_ATTR_IPV4_TUNNEL:
> +                if (tun_key->tun_flags & FLOW_TNL_F_KEY) {
> +                    tun_id = tun_key->tun_id;
> +
> +                    memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
> +                    if (!(tun_key->tun_flags & FLOW_TNL_F_KEY))
> +                        return -EINVAL;
> +
> +                    if (tun_key->tun_id != tun_id)
> +                        return -EINVAL;
> +                } else
> +                    memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
> +
> +                if (!tun_key->ipv4_dst)
> +                    return -EINVAL;
>                break;
> 
>            case OVS_KEY_ATTR_IN_PORT:
> @@ -1210,8 +1272,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
>        nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
>        goto nla_put_failure;
> 
> -    if (swkey->phy.tun_id != cpu_to_be64(0) &&
> -        nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id))
> +    if (swkey->tun.tun_key.ipv4_dst) {
> +        struct ovs_key_ipv4_tunnel *tun_key;
> +        nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL,
> +                  sizeof(*tun_key));
> +        if (!nla)
> +            goto nla_put_failure;
> +        tun_key = nla_data(nla);
> +        memcpy(tun_key, &swkey->tun.tun_key, sizeof(*tun_key));
> +    } else if ((swkey->tun.tun_key.tun_flags & FLOW_TNL_F_KEY) &&
> +        nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->tun.tun_key.tun_id))
>        goto nla_put_failure;
> 
>    if (swkey->phy.in_port != DP_MAX_PORTS &&
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 02c563a..c52e029 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -42,11 +42,13 @@ struct sw_flow_actions {
> 
> struct sw_flow_key {
>    struct {
> -        __be64    tun_id;        /* Encapsulating tunnel ID. */
>        u32    priority;    /* Packet QoS priority. */
>        u16    in_port;    /* Input switch port (or DP_MAX_PORTS). */
>    } phy;
>    struct {
> +        struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
> +    } tun;
> +    struct {
>        u8     src[ETH_ALEN];    /* Ethernet source address. */
>        u8     dst[ETH_ALEN];    /* Ethernet destination address. */
>        __be16 tci;        /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
> @@ -150,6 +152,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
>  *                         ------  ---  ------  -----
>  *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
>  *  OVS_KEY_ATTR_TUN_ID        8    --     4     12
> + *  OVS_KEY_ATTR_IPV4_TUNNEL  24    --     4     28
>  *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
>  *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
>  *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype)
> @@ -160,14 +163,15 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
>  *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
>  *  OVS_KEY_ATTR_ND           28    --     4     32
>  *  -------------------------------------------------
> - *  total                                       156
> + *  total                                       184
>  */
> -#define FLOW_BUFSIZE 156
> +#define FLOW_BUFSIZE 184
> 
> int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
> int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
>              const struct nlattr *);
> -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
> +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
> +                   struct ovs_key_ipv4_tunnel *tun_key,
>                   const struct nlattr *);
> 
> #define MAX_ACTIONS_BUFSIZE    (16 * 1024)
> diff --git a/datapath/tunnel.c b/datapath/tunnel.c
> index d651c11..72ead8f 100644
> --- a/datapath/tunnel.c
> +++ b/datapath/tunnel.c
> @@ -367,9 +367,9 @@ struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
>    return NULL;
> }
> 
> -static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
> +static void ecn_decapsulate(struct sk_buff *skb)
> {
> -    if (unlikely(INET_ECN_is_ce(tos))) {
> +    if (unlikely(INET_ECN_is_ce(OVS_CB(skb)->tun_key->ipv4_tos))) {
>        __be16 protocol = skb->protocol;
> 
>        skb_set_network_header(skb, ETH_HLEN);
> @@ -416,7 +416,7 @@ static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
>  * - skb->csum does not include the inner Ethernet header.
>  * - The layer pointers are undefined.
>  */
> -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
> +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
> {
>    struct ethhdr *eh;
> 
> @@ -433,7 +433,7 @@ void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
>    skb_clear_rxhash(skb);
>    secpath_reset(skb);
> 
> -    ecn_decapsulate(skb, tos);
> +    ecn_decapsulate(skb);
>    vlan_set_tci(skb, 0);
> 
>    if (unlikely(compute_ip_summed(skb, false))) {
> @@ -613,7 +613,7 @@ static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
> 
> bool ovs_tnl_frag_needed(struct vport *vport,
>             const struct tnl_mutable_config *mutable,
> -             struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
> +             struct sk_buff *skb, unsigned int mtu)
> {
>    unsigned int eth_hdr_len = ETH_HLEN;
>    unsigned int total_length = 0, header_length = 0, payload_length;
> @@ -697,17 +697,6 @@ bool ovs_tnl_frag_needed(struct vport *vport,
>        ipv6_build_icmp(skb, nskb, mtu, payload_length);
> #endif
> 
> -    /*
> -     * Assume that flow based keys are symmetric with respect to input
> -     * and output and use the key that we were going to put on the
> -     * outgoing packet for the fake received packet.  If the keys are
> -     * not symmetric then PMTUD needs to be disabled since we won't have
> -     * any way of synthesizing packets.
> -     */
> -    if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) ==
> -        (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
> -        OVS_CB(nskb)->tun_id = flow_key;
> -
>    if (unlikely(compute_ip_summed(nskb, false))) {
>        kfree_skb(nskb);
>        return false;
> @@ -721,14 +710,26 @@ bool ovs_tnl_frag_needed(struct vport *vport,
> static bool check_mtu(struct sk_buff *skb,
>              struct vport *vport,
>              const struct tnl_mutable_config *mutable,
> -              const struct rtable *rt, __be16 *frag_offp)
> +              const struct rtable *rt, __be16 *frag_offp,
> +              int tunnel_hlen)
> {
> -    bool df_inherit = mutable->flags & TNL_F_DF_INHERIT;
> -    bool pmtud = mutable->flags & TNL_F_PMTUD;
> -    __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
> +    bool df_inherit;
> +    bool pmtud;
> +    __be16 frag_off;
>    int mtu = 0;
>    unsigned int packet_length = skb->len - ETH_HLEN;
> 
> +    if (OVS_CB(skb)->tun_key->ipv4_dst) {
> +        df_inherit = false;
> +        pmtud = false;
> +        frag_off = OVS_CB(skb)->tun_key->tun_flags & FLOW_TNL_F_DONT_FRAGMENT ?
> +                  htons(IP_DF) : 0;
> +    } else {
> +        df_inherit = mutable->flags & TNL_F_DF_INHERIT;
> +        pmtud = mutable->flags & TNL_F_PMTUD;
> +        frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
> +    }
> +
>    /* Allow for one level of tagging in the packet length. */
>    if (!vlan_tx_tag_present(skb) &&
>        eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
> @@ -746,7 +747,7 @@ static bool check_mtu(struct sk_buff *skb,
> 
>        mtu = dst_mtu(&rt_dst(rt))
>            - ETH_HLEN
> -            - mutable->tunnel_hlen
> +            - tunnel_hlen
>            - vlan_header;
>    }
> 
> @@ -760,8 +761,7 @@ static bool check_mtu(struct sk_buff *skb,
>            mtu = max(mtu, IP_MIN_MTU);
> 
>            if (packet_length > mtu &&
> -                ovs_tnl_frag_needed(vport, mutable, skb, mtu,
> -                        OVS_CB(skb)->tun_id))
> +                ovs_tnl_frag_needed(vport, mutable, skb, mtu))
>                return false;
>        }
>    }
> @@ -777,8 +777,7 @@ static bool check_mtu(struct sk_buff *skb,
>            mtu = max(mtu, IPV6_MIN_MTU);
> 
>            if (packet_length > mtu &&
> -                ovs_tnl_frag_needed(vport, mutable, skb, mtu,
> -                        OVS_CB(skb)->tun_id))
> +                ovs_tnl_frag_needed(vport, mutable, skb, mtu))
>                return false;
>        }
>    }
> @@ -790,6 +789,7 @@ static bool check_mtu(struct sk_buff *skb,
> 
> static void create_tunnel_header(const struct vport *vport,
>                 const struct tnl_mutable_config *mutable,
> +                 const struct ovs_key_ipv4_tunnel *tun_key,
>                 const struct rtable *rt, void *header)
> {
>    struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
> @@ -806,7 +806,7 @@ static void create_tunnel_header(const struct vport *vport,
>    if (!iph->ttl)
>        iph->ttl = ip4_dst_hoplimit(&rt_dst(rt));
> 
> -    tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
> +    tnl_vport->tnl_ops->build_header(vport, mutable, tun_key, iph + 1);
> }
> 
> static void *get_cached_header(const struct tnl_cache *cache)
> @@ -907,14 +907,21 @@ static struct tnl_cache *build_cache(struct vport *vport,
>                     struct rtable *rt)
> {
>    struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
> +    static const struct ovs_key_ipv4_tunnel tun_key;
>    struct tnl_cache *cache;
>    void *cache_data;
>    int cache_len;
>    struct hh_cache *hh;
> +    int tunnel_hlen;
> 
>    if (!(mutable->flags & TNL_F_HDR_CACHE))
>        return NULL;
> 
> +    tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, &tun_key) +
> +                sizeof(struct iphdr);
> +    if (tunnel_hlen < 0)
> +        return NULL;
> +
>    /*
>     * If there is no entry in the ARP cache or if this device does not
>     * support hard header caching just fall back to the IP stack.
> @@ -937,7 +944,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
>    else
>        cache = NULL;
> 
> -    cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + mutable->tunnel_hlen;
> +    cache_len = LL_RESERVED_SPACE(rt_dst(rt).dev) + tunnel_hlen;
> 
>    cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) +
>            cache_len, GFP_ATOMIC);
> @@ -946,9 +953,9 @@ static struct tnl_cache *build_cache(struct vport *vport,
> 
>    create_eth_hdr(cache, hh);
>    cache_data = get_cached_header(cache) + cache->hh_len;
> -    cache->len = cache->hh_len + mutable->tunnel_hlen;
> +    cache->len = cache->hh_len + tunnel_hlen;
> 
> -    create_tunnel_header(vport, mutable, rt, cache_data);
> +    create_tunnel_header(vport, mutable, &tun_key, rt, cache_data);
> 
>    cache->mutable_seq = mutable->seq;
>    cache->rt = rt;
> @@ -1000,15 +1007,16 @@ unlock:
> }
> 
> static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
> -                   u8 ipproto, u8 tos)
> +                   __be32 saddr, __be32 daddr, u8 ipproto,
> +                   u8 tos)
> {
>    /* Tunnel configuration keeps DSCP part of TOS bits, But Linux
>     * router expect RT_TOS bits only. */
> 
> #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
>    struct flowi fl = { .nl_u = { .ip4_u = {
> -                    .daddr = mutable->key.daddr,
> -                    .saddr = mutable->key.saddr,
> +                    .daddr = daddr,
> +                    .saddr = saddr,
>                    .tos   = RT_TOS(tos) } },
>                    .proto = ipproto };
>    struct rtable *rt;
> @@ -1018,8 +1026,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
> 
>    return rt;
> #else
> -    struct flowi4 fl = { .daddr = mutable->key.daddr,
> -                 .saddr = mutable->key.saddr,
> +    struct flowi4 fl = { .daddr = daddr,
> +                 .saddr = saddr,
>                 .flowi4_tos = RT_TOS(tos),
>                 .flowi4_proto = ipproto };
> 
> @@ -1029,7 +1037,8 @@ static struct rtable *__find_route(const struct tnl_mutable_config *mutable,
> 
> static struct rtable *find_route(struct vport *vport,
>                 const struct tnl_mutable_config *mutable,
> -                 u8 tos, struct tnl_cache **cache)
> +                 __be32 saddr, __be32 daddr, u8 tos,
> +                 struct tnl_cache **cache)
> {
>    struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
>    struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache);
> @@ -1037,17 +1046,17 @@ static struct rtable *find_route(struct vport *vport,
>    *cache = NULL;
>    tos = RT_TOS(tos);
> 
> -    if (likely(tos == RT_TOS(mutable->tos) &&
> -        check_cache_valid(cur_cache, mutable))) {
> +    if (tos == RT_TOS(mutable->tos) &&
> +        check_cache_valid(cur_cache, mutable)) {
>        *cache = cur_cache;
>        return cur_cache->rt;
>    } else {
>        struct rtable *rt;
> 
> -        rt = __find_route(mutable, tnl_vport->tnl_ops->ipproto, tos);
> +        rt = __find_route(mutable, saddr, daddr,
> +                  tnl_vport->tnl_ops->ipproto, tos);
>        if (IS_ERR(rt))
>            return NULL;
> -
>        if (likely(tos == RT_TOS(mutable->tos)))
>            *cache = build_cache(vport, mutable, rt);
> 
> @@ -1076,13 +1085,14 @@ static bool need_linearize(const struct sk_buff *skb)
> 
> static struct sk_buff *handle_offloads(struct sk_buff *skb,
>                       const struct tnl_mutable_config *mutable,
> -                       const struct rtable *rt)
> +                       const struct rtable *rt,
> +                       int tunnel_hlen)
> {
>    int min_headroom;
>    int err;
> 
>    min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
> -            + mutable->tunnel_hlen
> +            + tunnel_hlen
>            + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
> 
>    if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
> @@ -1137,14 +1147,14 @@ error:
> }
> 
> static int send_frags(struct sk_buff *skb,
> -              const struct tnl_mutable_config *mutable)
> +              int tunnel_hlen)
> {
>    int sent_len;
> 
>    sent_len = 0;
>    while (skb) {
>        struct sk_buff *next = skb->next;
> -        int frag_len = skb->len - mutable->tunnel_hlen;
> +        int frag_len = skb->len - tunnel_hlen;
>        int err;
> 
>        skb->next = NULL;
> @@ -1173,15 +1183,17 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
> {
>    struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
>    const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
> -
>    enum vport_err_type err = VPORT_E_TX_ERROR;
>    struct rtable *rt;
>    struct dst_entry *unattached_dst = NULL;
>    struct tnl_cache *cache;
> +    struct ovs_key_ipv4_tunnel tun_key;
>    int sent_len = 0;
> +    int tunnel_hlen;
>    __be16 frag_off = 0;
> +    __be32 daddr;
> +    __be32 saddr;
>    u8 ttl;
> -    u8 inner_tos;
>    u8 tos;
> 
>    /* Validate the protocol headers before we try to use them. */
> @@ -1207,30 +1219,76 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
>    }
> #endif
> 
> -    /* ToS */
> -    if (skb->protocol == htons(ETH_P_IP))
> -        inner_tos = ip_hdr(skb)->tos;
> +    /* If OVS_CB(skb)->tun_key is NULL, point it at the local tun_key here,
> +     * and zero it out.
> +     */
> +    if (!OVS_CB(skb)->tun_key) {
> +        memset(&tun_key, 0, sizeof(tun_key));
> +        OVS_CB(skb)->tun_key = &tun_key;
> +    }
> +
> +    tunnel_hlen = tnl_vport->tnl_ops->hdr_len(mutable, OVS_CB(skb)->tun_key) +
> +                sizeof(struct iphdr);
> +
> +    if (tunnel_hlen < 0) {
> +        err = VPORT_E_TX_DROPPED;
> +        goto error_free;
> +    }
> +
> +    if (OVS_CB(skb)->tun_key->ipv4_dst) {
> +        daddr = OVS_CB(skb)->tun_key->ipv4_dst;
> +        saddr = OVS_CB(skb)->tun_key->ipv4_src;
> +        tos = OVS_CB(skb)->tun_key->ipv4_tos;
> +        ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
> +    } else {
> +        u8 inner_tos;
> +        daddr = mutable->key.daddr;
> +        saddr = mutable->key.saddr;
> +
> +        if (!daddr) {
> +            /* Trying to sent packet from Null-port without
> +             * tunnel info? Drop this packet. */
> +            err = VPORT_E_TX_DROPPED;
> +            goto error_free;
> +        }
> +
> +        /* ToS */
> +        if (skb->protocol == htons(ETH_P_IP))
> +            inner_tos = ip_hdr(skb)->tos;
> #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> -    else if (skb->protocol == htons(ETH_P_IPV6))
> -        inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
> +        else if (skb->protocol == htons(ETH_P_IPV6))
> +            inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
> #endif
> -    else
> -        inner_tos = 0;
> +        else
> +            inner_tos = 0;
> 
> -    if (mutable->flags & TNL_F_TOS_INHERIT)
> -        tos = inner_tos;
> -    else
> -        tos = mutable->tos;
> +        if (mutable->flags & TNL_F_TOS_INHERIT)
> +            tos = inner_tos;
> +        else
> +            tos = mutable->tos;
> +
> +        tos = INET_ECN_encapsulate(tos, inner_tos);
> +
> +        /* TTL */
> +        ttl = mutable->ttl;
> +        if (mutable->flags & TNL_F_TTL_INHERIT) {
> +            if (skb->protocol == htons(ETH_P_IP))
> +                ttl = ip_hdr(skb)->ttl;
> +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> +            else if (skb->protocol == htons(ETH_P_IPV6))
> +                ttl = ipv6_hdr(skb)->hop_limit;
> +#endif
> +        }
> +
> +    }
> 
>    /* Route lookup */
> -    rt = find_route(vport, mutable, tos, &cache);
> +    rt = find_route(vport, mutable, saddr, daddr, tos, &cache);
>    if (unlikely(!rt))
>        goto error_free;
>    if (unlikely(!cache))
>        unattached_dst = &rt_dst(rt);
> 
> -    tos = INET_ECN_encapsulate(tos, inner_tos);
> -
>    /* Reset SKB */
>    nf_reset(skb);
>    secpath_reset(skb);
> @@ -1238,12 +1296,12 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
>    skb_clear_rxhash(skb);
> 
>    /* Offloading */
> -    skb = handle_offloads(skb, mutable, rt);
> +    skb = handle_offloads(skb, mutable, rt, tunnel_hlen);
>    if (IS_ERR(skb))
>        goto error;
> 
>    /* MTU */
> -    if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) {
> +    if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off, tunnel_hlen))) {
>        err = VPORT_E_TX_DROPPED;
>        goto error_free;
>    }
> @@ -1252,25 +1310,19 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
>     * If we are over the MTU, allow the IP stack to handle fragmentation.
>     * Fragmentation is a slow path anyways.
>     */
> -    if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
> +    if (unlikely(skb->len + tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
>             cache)) {
>        unattached_dst = &rt_dst(rt);
>        dst_hold(unattached_dst);
>        cache = NULL;
>    }
> 
> -    /* TTL */
> -    ttl = mutable->ttl;
> -    if (!ttl)
> -        ttl = ip4_dst_hoplimit(&rt_dst(rt));
> -
> -    if (mutable->flags & TNL_F_TTL_INHERIT) {
> -        if (skb->protocol == htons(ETH_P_IP))
> -            ttl = ip_hdr(skb)->ttl;
> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> -        else if (skb->protocol == htons(ETH_P_IPV6))
> -            ttl = ipv6_hdr(skb)->hop_limit;
> -#endif
> +    /* TTL Fixup. */
> +    if (!OVS_CB(skb)->tun_key->ipv4_dst) {
> +        if (!(mutable->flags & TNL_F_TTL_INHERIT)) {
> +            if (!ttl)
> +                ttl = ip4_dst_hoplimit(&rt_dst(rt));
> +        }
>    }
> 
>    while (skb) {
> @@ -1288,8 +1340,8 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
>            skb_set_network_header(skb, cache->hh_len);
> 
>        } else {
> -            skb_push(skb, mutable->tunnel_hlen);
> -            create_tunnel_header(vport, mutable, rt, skb->data);
> +            skb_push(skb, tunnel_hlen);
> +            create_tunnel_header(vport, mutable, OVS_CB(skb)->tun_key, rt, skb->data);
>            skb_reset_network_header(skb);
> 
>            if (next_skb)
> @@ -1308,7 +1360,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
>        ip_select_ident(iph, &rt_dst(rt), NULL);
> 
>        skb = tnl_vport->tnl_ops->update_header(vport, mutable,
> -                            &rt_dst(rt), skb);
> +                            &rt_dst(rt), skb, tunnel_hlen);
>        if (unlikely(!skb))
>            goto next;
> 
> @@ -1341,7 +1393,7 @@ int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
>                    sent_len += orig_len;
>            }
>        } else
> -            sent_len += send_frags(skb, mutable);
> +            sent_len += send_frags(skb, tunnel_hlen);
> 
> next:
>        skb = next_skb;
> @@ -1427,12 +1479,6 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
>    else
>        mutable->out_key = nla_get_be64(a[OVS_TUNNEL_ATTR_OUT_KEY]);
> 
> -    mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
> -    if (mutable->tunnel_hlen < 0)
> -        return mutable->tunnel_hlen;
> -
> -    mutable->tunnel_hlen += sizeof(struct iphdr);
> -
>    old_vport = port_table_lookup(&mutable->key, &old_mutable);
>    if (old_vport && old_vport != cur_vport)
>        return -EEXIST;
> @@ -1442,7 +1488,8 @@ static int tnl_set_config(struct net *net, struct nlattr *options,
>        struct net_device *dev;
>        struct rtable *rt;
> 
> -        rt = __find_route(mutable, tnl_ops->ipproto, mutable->tos);
> +        rt = __find_route(mutable, mutable->key.saddr, mutable->key.daddr,
> +                  tnl_ops->ipproto, mutable->tos);
>        if (IS_ERR(rt))
>            return -EADDRNOTAVAIL;
>        dev = rt_dst(rt).dev;
> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
> index d2a87f2..951a6f1 100644
> --- a/datapath/tunnel.h
> +++ b/datapath/tunnel.h
> @@ -109,8 +109,6 @@ struct tnl_mutable_config {
> 
>    unsigned seq;
> 
> -    unsigned tunnel_hlen;
> -
>    unsigned char eth_addr[ETH_ALEN];
> 
>    /* Configured via OVS_TUNNEL_ATTR_* attributes. */
> @@ -132,7 +130,8 @@ struct tnl_ops {
>     * build_header() (i.e. excludes the IP header).  Returns a negative
>     * error code if the configuration is invalid.
>     */
> -    int (*hdr_len)(const struct tnl_mutable_config *);
> +    int (*hdr_len)(const struct tnl_mutable_config *,
> +               const struct ovs_key_ipv4_tunnel *);
> 
>    /*
>     * Builds the static portion of the tunnel header, which is stored in
> @@ -143,7 +142,8 @@ struct tnl_ops {
>     * called for every packet, so try not to make it too slow.
>     */
>    void (*build_header)(const struct vport *,
> -                 const struct tnl_mutable_config *, void *header);
> +                 const struct tnl_mutable_config *,
> +                 const struct ovs_key_ipv4_tunnel *, void *header);
> 
>    /*
>     * Updates the cached header of a packet to match the actual packet
> @@ -155,7 +155,8 @@ struct tnl_ops {
>     */
>    struct sk_buff *(*update_header)(const struct vport *,
>                     const struct tnl_mutable_config *,
> -                     struct dst_entry *, struct sk_buff *);
> +                     struct dst_entry *, struct sk_buff *,
> +                     int tunnel_hlen);
> };
> 
> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
> @@ -270,14 +271,14 @@ int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr);
> const char *ovs_tnl_get_name(const struct vport *vport);
> const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
> int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
> -void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);
> +void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb);
> 
> struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
>                __be64 key, int tunnel_type,
>                const struct tnl_mutable_config **mutable);
> bool ovs_tnl_frag_needed(struct vport *vport,
>             const struct tnl_mutable_config *mutable,
> -             struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
> +             struct sk_buff *skb, unsigned int mtu);
> void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
> 
> int ovs_tnl_init(void);
> @@ -287,4 +288,15 @@ static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
>    return vport_priv(vport);
> }
> 
> +static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
> +                    const struct iphdr *iph, __be64 tun_id, u32 tun_flags)
> +{
> +    tun_key->tun_id = tun_id;
> +    tun_key->ipv4_src = iph->saddr;
> +    tun_key->ipv4_dst = iph->daddr;
> +    tun_key->ipv4_tos = iph->tos;
> +    tun_key->ipv4_ttl = iph->ttl;
> +    tun_key->tun_flags = tun_flags;
> +}
> +
> #endif /* tunnel.h */
> diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
> index 05a099d..f6c34ab 100644
> --- a/datapath/vport-capwap.c
> +++ b/datapath/vport-capwap.c
> @@ -155,16 +155,52 @@ static struct inet_frags frag_state = {
>    .secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
> };
> 
> -static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
> +static int get_capwap_param(const struct tnl_mutable_config *mutable,
> +            const struct ovs_key_ipv4_tunnel *tun_key,
> +            u32 *flags,  __be64 *out_key)
> +{
> +    if (tun_key->ipv4_dst) {
> +        *flags = 0;
> +
> +        if (tun_key->tun_flags & FLOW_TNL_F_KEY)
> +            *flags = TNL_F_OUT_KEY_ACTION;
> +        if (tun_key->tun_flags & FLOW_TNL_F_CSUM)
> +            *flags |= TNL_F_CSUM;
> +        *out_key = tun_key->tun_id;
> +    } else {
> +        *flags = mutable->flags;
> +        if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
> +            if (likely(tun_key->tun_flags & FLOW_TNL_F_KEY)) {
> +                *out_key = tun_key->tun_id;
> +            } else {
> +                *out_key = 0;
> +                return -EINVAL;
> +            }
> +        } else
> +            *out_key = mutable->out_key;
> +
> +    }
> +    return 0;
> +}
> +
> +static int capwap_hdr_len(const struct tnl_mutable_config *mutable,
> +              const struct ovs_key_ipv4_tunnel *tun_key)
> {
>    int size = CAPWAP_MIN_HLEN;
> +    u32 flags;
> +    __be64 out_key;
> +    int err;
> +
> +    err = get_capwap_param(mutable, tun_key, &flags, &out_key);
> +    if (err)
> +        return err;
> 
>    /* CAPWAP has no checksums. */
> -    if (mutable->flags & TNL_F_CSUM)
> +    if (flags & TNL_F_CSUM)
>        return -EINVAL;
> 
>    /* if keys are specified, then add WSI field */
> -    if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
> +    if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
>        size += sizeof(struct capwaphdr_wsi) +
>            sizeof(struct capwaphdr_wsi_key);
>    }
> @@ -174,10 +210,15 @@ static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
> 
> static void capwap_build_header(const struct vport *vport,
>                const struct tnl_mutable_config *mutable,
> +                const struct ovs_key_ipv4_tunnel *tun_key,
>                void *header)
> {
>    struct udphdr *udph = header;
>    struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
> +    u32 flags;
> +    __be64 out_key;
> +
> +    get_capwap_param(mutable, tun_key, &flags, &out_key);
> 
>    udph->source = htons(CAPWAP_SRC_PORT);
>    udph->dest = htons(CAPWAP_DST_PORT);
> @@ -186,7 +227,7 @@ static void capwap_build_header(const struct vport *vport,
>    cwh->frag_id = 0;
>    cwh->frag_off = 0;
> 
> -    if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
> +    if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
>        struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
> 
>        cwh->begin = CAPWAP_KEYED;
> @@ -197,9 +238,9 @@ static void capwap_build_header(const struct vport *vport,
>        wsi->flags = CAPWAP_WSI_F_KEY64;
>        wsi->reserved_padding = 0;
> 
> -        if (mutable->out_key) {
> +        if (out_key) {
>            struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
> -            opt->key = mutable->out_key;
> +            opt->key = out_key;
>        }
>    } else {
>        /* make packet readable by old capwap code */
> @@ -210,30 +251,39 @@ static void capwap_build_header(const struct vport *vport,
> static struct sk_buff *capwap_update_header(const struct vport *vport,
>                        const struct tnl_mutable_config *mutable,
>                        struct dst_entry *dst,
> -                        struct sk_buff *skb)
> +                        struct sk_buff *skb,
> +                        int tunnel_hlen)
> {
> +    struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
>    struct udphdr *udph = udp_hdr(skb);
> +    u32 flags;
> +    __be64 out_key;
> +
> +    if (get_capwap_param(mutable, tun_key, &flags, &out_key)) {
> +        kfree_skb(skb);
> +        return NULL;
> +    }
> 
> -    if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
> +    if (flags & TNL_F_OUT_KEY_ACTION) {
>        /* first field in WSI is key */
>        struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
>        struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
>        struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
> 
> -        opt->key = OVS_CB(skb)->tun_id;
> +        opt->key = out_key;
>    }
> 
>    udph->len = htons(skb->len - skb_transport_offset(skb));
> 
>    if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) {
> -        unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable);
> +        unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable, tun_key);
>        skb = fragment(skb, vport, dst, hlen);
>    }
> 
>    return skb;
> }
> 
> -static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
> +static int process_capwap_wsi(struct sk_buff *skb, __be64 *key, bool *key_preset)
> {
>    struct capwaphdr *cwh = capwap_hdr(skb);
>    struct capwaphdr_wsi *wsi;
> @@ -270,12 +320,15 @@ static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
> 
>        opt = (struct capwaphdr_wsi_key *)(wsi + 1);
>        *key = opt->key;
> +        *key_preset = true;
> +    } else {
> +        *key_preset = false;
>    }
> 
>    return 0;
> }
> 
> -static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
> +static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key, bool *key_preset)
> {
>    struct capwaphdr *cwh = capwap_hdr(skb);
>    int hdr_len = sizeof(struct udphdr);
> @@ -301,7 +354,7 @@ static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
>        cwh = capwap_hdr(skb);
>    }
> 
> -    if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key))
> +    if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key, key_preset))
>        goto error;
> 
>    return skb;
> @@ -316,12 +369,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
>    struct vport *vport;
>    const struct tnl_mutable_config *mutable;
>    struct iphdr *iph;
> +    struct ovs_key_ipv4_tunnel tun_key;
>    __be64 key = 0;
> +    bool key_preset = false;
> 
>    if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN)))
>        goto error;
> 
> -    skb = process_capwap_proto(skb, &key);
> +    skb = process_capwap_proto(skb, &key, &key_preset);
>    if (unlikely(!skb))
>        goto out;
> 
> @@ -334,11 +389,14 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
>    }
> 
>    if (mutable->flags & TNL_F_IN_KEY_MATCH)
> -        OVS_CB(skb)->tun_id = key;
> +        key_preset = true;
>    else
> -        OVS_CB(skb)->tun_id = 0;
> +        key_preset = false;
> +
> +    tnl_tun_key_init(&tun_key, iph, key, key_preset ? FLOW_TNL_F_KEY : 0);
> +    OVS_CB(skb)->tun_key = &tun_key;
> 
> -    ovs_tnl_rcv(vport, skb, iph->tos);
> +    ovs_tnl_rcv(vport, skb);
>    goto out;
> 
> error:
> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
> index e3a190f..ec788d5 100644
> --- a/datapath/vport-gre.c
> +++ b/datapath/vport-gre.c
> @@ -45,22 +45,61 @@ struct gre_base_hdr {
>    __be16 protocol;
> };
> 
> -static int gre_hdr_len(const struct tnl_mutable_config *mutable)
> +static int get_gre_param(const struct tnl_mutable_config *mutable,
> +            const struct ovs_key_ipv4_tunnel *tun_key,
> +            u32 *flags, u32 *tunnel_type, __be64 *out_key)
> +{
> +    if (tun_key->ipv4_dst) {
> +        *flags = 0;
> +
> +        if (tun_key->tun_flags & FLOW_TNL_F_KEY)
> +            *flags = TNL_F_OUT_KEY_ACTION;
> +        if (tun_key->tun_flags & FLOW_TNL_F_CSUM)
> +            *flags |= TNL_F_CSUM;
> +        *tunnel_type = TNL_T_PROTO_GRE;
> +        *out_key = tun_key->tun_id;
> +    } else {
> +        *flags = mutable->flags;
> +        *tunnel_type = mutable->key.tunnel_type;
> +        if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
> +            if (likely(tun_key->tun_flags & FLOW_TNL_F_KEY)) {
> +                *out_key = tun_key->tun_id;
> +            } else {
> +                *out_key = 0;
> +                return -EINVAL;
> +            }
> +        } else
> +            *out_key = mutable->out_key;
> +
> +    }
> +    return 0;
> +}
> +
> +static int gre_hdr_len(const struct tnl_mutable_config *mutable,
> +               const struct ovs_key_ipv4_tunnel *tun_key)
> {
>    int len;
> +    u32 flags;
> +    u32 tunnel_type;
> +    __be64 out_key;
> +    int err;
> +
> +    err = get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key);
> +    if (err)
> +        return err;
> 
>    len = GRE_HEADER_SECTION;
> 
> -    if (mutable->flags & TNL_F_CSUM)
> +    if (flags & TNL_F_CSUM)
>        len += GRE_HEADER_SECTION;
> 
>    /* Set key for GRE64 tunnels, even when key if is zero. */
> -    if (mutable->out_key ||
> -        mutable->key.tunnel_type & TNL_T_PROTO_GRE64 ||
> -        mutable->flags & TNL_F_OUT_KEY_ACTION) {
> +    if (out_key ||
> +        tunnel_type & TNL_T_PROTO_GRE64 ||
> +        flags & TNL_F_OUT_KEY_ACTION) {
> 
>        len += GRE_HEADER_SECTION;
> -        if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
> +        if (tunnel_type & TNL_T_PROTO_GRE64)
>            len += GRE_HEADER_SECTION;
>    }
>    return len;
> @@ -88,32 +127,38 @@ static __be32 be64_get_high32(__be64 x)
> 
> static void gre_build_header(const struct vport *vport,
>                 const struct tnl_mutable_config *mutable,
> +                 const struct ovs_key_ipv4_tunnel *tun_key,
>                 void *header)
> {
>    struct gre_base_hdr *greh = header;
>    __be32 *options = (__be32 *)(greh + 1);
> +    u32 flags;
> +    u32 tunnel_type;
> +    __be64 out_key;
> +
> +    get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key);
> 
>    greh->protocol = htons(ETH_P_TEB);
>    greh->flags = 0;
> 
> -    if (mutable->flags & TNL_F_CSUM) {
> +    if (flags & TNL_F_CSUM) {
>        greh->flags |= GRE_CSUM;
>        *options = 0;
>        options++;
>    }
> 
> -    if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
> +    if (flags & TNL_F_OUT_KEY_ACTION) {
>        greh->flags |= GRE_KEY;
> -        if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
> +        if (tunnel_type & TNL_T_PROTO_GRE64)
>            greh->flags |= GRE_SEQ;
> 
> -    } else if (mutable->out_key ||
> -           mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
> +    } else if (out_key ||
> +           tunnel_type & TNL_T_PROTO_GRE64) {
>        greh->flags |= GRE_KEY;
> -        *options = be64_get_low32(mutable->out_key);
> -        if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
> +        *options = be64_get_low32(out_key);
> +        if (tunnel_type & TNL_T_PROTO_GRE64) {
>            options++;
> -            *options = be64_get_high32(mutable->out_key);
> +            *options = be64_get_high32(out_key);
>            greh->flags |= GRE_SEQ;
>        }
>    }
> @@ -122,28 +167,38 @@ static void gre_build_header(const struct vport *vport,
> static struct sk_buff *gre_update_header(const struct vport *vport,
>                     const struct tnl_mutable_config *mutable,
>                     struct dst_entry *dst,
> -                     struct sk_buff *skb)
> +                     struct sk_buff *skb,
> +                     int tunnel_hlen)
> {
> -    __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
> +    u32 flags;
> +    u32 tunnel_type;
> +    __be64 out_key;
> +    struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
> +    __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
>                           - GRE_HEADER_SECTION);
> 
> +    if (get_gre_param(mutable, tun_key, &flags, &tunnel_type, &out_key)) {
> +        kfree_skb(skb);
> +        return NULL;
> +    }
> +
>    /* Work backwards over the options so the checksum is last. */
> -    if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
> -        if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
> +    if (flags & TNL_F_OUT_KEY_ACTION) {
> +        if (tunnel_type & TNL_T_PROTO_GRE64) {
>            /* Set higher 32 bits to seq. */
> -            *options = be64_get_high32(OVS_CB(skb)->tun_id);
> +            *options = be64_get_low32(out_key);
>            options--;
>        }
> -        *options = be64_get_low32(OVS_CB(skb)->tun_id);
> +        *options = be64_get_low32(out_key);
>        options--;
> -    } else if (mutable->out_key ||
> -           mutable->key.tunnel_type & TNL_T_PROTO_GRE64) {
> +    } else if (out_key ||
> +           tunnel_type & TNL_T_PROTO_GRE64) {
>        options--;
> -        if (mutable->key.tunnel_type & TNL_T_PROTO_GRE64)
> +        if (tunnel_type & TNL_T_PROTO_GRE64)
>            options--;
>    }
> 
> -    if (mutable->flags & TNL_F_CSUM)
> +    if (flags & TNL_F_CSUM)
>        *(__sum16 *)options = csum_fold(skb_checksum(skb,
>                        skb_transport_offset(skb),
>                        skb->len - skb_transport_offset(skb),
> @@ -335,7 +390,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
> #endif
> 
>    __skb_pull(skb, tunnel_hdr_len);
> -    ovs_tnl_frag_needed(vport, mutable, skb, mtu, key);
> +    ovs_tnl_frag_needed(vport, mutable, skb, mtu);
>    __skb_push(skb, tunnel_hdr_len);
> 
> out:
> @@ -370,6 +425,20 @@ static bool check_checksum(struct sk_buff *skb)
>    return (csum == 0);
> }
> 
> +static u32 gre_flags_to_tunnel_flags(const struct tnl_mutable_config *mutable,
> +                     __be16 gre_flags)
> +{
> +    u32 tunnel_flags = 0;
> +
> +    if ((mutable->flags & TNL_F_IN_KEY_MATCH) && (gre_flags & GRE_KEY))
> +        tunnel_flags = FLOW_TNL_F_KEY;
> +
> +    if (gre_flags & GRE_CSUM)
> +        tunnel_flags |= FLOW_TNL_F_CSUM;
> +
> +    return tunnel_flags;
> +}
> +
> /* Called with rcu_read_lock and BH disabled. */
> static int gre_rcv(struct sk_buff *skb)
> {
> @@ -377,6 +446,7 @@ static int gre_rcv(struct sk_buff *skb)
>    const struct tnl_mutable_config *mutable;
>    int hdr_len;
>    struct iphdr *iph;
> +    struct ovs_key_ipv4_tunnel tun_key;
>    __be16 flags;
>    __be64 key;
>    u32 tunnel_type;
> @@ -401,15 +471,13 @@ static int gre_rcv(struct sk_buff *skb)
>        goto error;
>    }
> 
> -    if (mutable->flags & TNL_F_IN_KEY_MATCH)
> -        OVS_CB(skb)->tun_id = key;
> -    else
> -        OVS_CB(skb)->tun_id = 0;
> +    tnl_tun_key_init(&tun_key, iph, key, gre_flags_to_tunnel_flags(mutable, flags));
> +    OVS_CB(skb)->tun_key = &tun_key;
> 
>    __skb_pull(skb, hdr_len);
>    skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
> 
> -    ovs_tnl_rcv(vport, skb, iph->tos);
> +    ovs_tnl_rcv(vport, skb);
>    return 0;
> 
> error:
> diff --git a/datapath/vport.c b/datapath/vport.c
> index af1c066..d9c8cfd 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -463,7 +463,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
>        OVS_CB(skb)->flow = NULL;
> 
>    if (!(vport->ops->flags & VPORT_F_TUN_ID))
> -        OVS_CB(skb)->tun_id = 0;
> +        OVS_CB(skb)->tun_key = NULL;
> 
>    ovs_dp_process_received_packet(vport, skb);
> }
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index 294f6d0..2c98490 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -279,7 +279,8 @@ enum ovs_key_attr {
>    OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
>    OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
>    OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
> -    OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
> +    OVS_KEY_ATTR_IPV4_TUNNEL = 62,  /* struct ovs_key_ipv4_tunnel */
> +    OVS_KEY_ATTR_TUN_ID = 63,  /* be64 tunnel ID */
>    __OVS_KEY_ATTR_MAX
> };
> 
> @@ -361,6 +362,21 @@ struct ovs_key_nd {
>    __u8  nd_tll[6];
> };
> 
> +/* Values for ovs_key_ipv4_tunnel->tun_flags */
> +#define FLOW_TNL_F_DONT_FRAGMENT (1 << 0)
> +#define FLOW_TNL_F_CSUM (1 << 1)
> +#define FLOW_TNL_F_KEY (1 << 2)
> +
> +struct ovs_key_ipv4_tunnel {
> +    __be64 tun_id;
> +    __u32  tun_flags;
> +    __be32 ipv4_src;
> +    __be32 ipv4_dst;
> +    __u8   ipv4_tos;
> +    __u8   ipv4_ttl;
> +    __u8   pad[2];
> +};
> +
> /**
>  * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
>  * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c9e3210..797cb06 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1179,6 +1179,7 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
>     case OVS_KEY_ATTR_TUN_ID:
>     case OVS_KEY_ATTR_PRIORITY:
>     case OVS_KEY_ATTR_IPV6:
> +    case OVS_KEY_ATTR_IPV4_TUNNEL:
>         /* not implemented */
>         break;
> 
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 257d7a7..9ed17ed 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -93,6 +93,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
>     case OVS_KEY_ATTR_UNSPEC: return "unspec";
>     case OVS_KEY_ATTR_ENCAP: return "encap";
>     case OVS_KEY_ATTR_PRIORITY: return "priority";
> +    case OVS_KEY_ATTR_TUN_ID: return "tun_id";
> +    case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel";
>     case OVS_KEY_ATTR_IN_PORT: return "in_port";
>     case OVS_KEY_ATTR_ETHERNET: return "eth";
>     case OVS_KEY_ATTR_VLAN: return "vlan";
> @@ -105,7 +107,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
>     case OVS_KEY_ATTR_ICMPV6: return "icmpv6";
>     case OVS_KEY_ATTR_ARP: return "arp";
>     case OVS_KEY_ATTR_ND: return "nd";
> -    case OVS_KEY_ATTR_TUN_ID: return "tun_id";
> 
>     case __OVS_KEY_ATTR_MAX:
>     default:
> @@ -602,6 +603,7 @@ odp_flow_key_attr_len(uint16_t type)
>     case OVS_KEY_ATTR_ENCAP: return -2;
>     case OVS_KEY_ATTR_PRIORITY: return 4;
>     case OVS_KEY_ATTR_TUN_ID: return 8;
> +    case OVS_KEY_ATTR_IPV4_TUNNEL: return sizeof(struct ovs_key_ipv4_tunnel);
>     case OVS_KEY_ATTR_IN_PORT: return 4;
>     case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
>     case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16);
> @@ -668,6 +670,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
>     const struct ovs_key_icmpv6 *icmpv6_key;
>     const struct ovs_key_arp *arp_key;
>     const struct ovs_key_nd *nd_key;
> +    const struct ovs_key_ipv4_tunnel *ipv4_tun_key;
>     enum ovs_key_attr attr = nl_attr_type(a);
>     int expected_len;
> 
> @@ -698,6 +701,16 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds)
>         ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a)));
>         break;
> 
> +    case OVS_KEY_ATTR_IPV4_TUNNEL:
> +        ipv4_tun_key = nl_attr_get(a);
> +        ds_put_format(ds, "(tun_id=0x%"PRIx64",flags=0x%"PRIx32
> +                      ",src="IP_FMT",dst="IP_FMT",tos=0x%"PRIx8",ttl=%"PRIu8")",
> +                      ntohll(ipv4_tun_key->tun_id), ipv4_tun_key->tun_flags,
> +                      IP_ARGS(&ipv4_tun_key->ipv4_src),
> +                      IP_ARGS(&ipv4_tun_key->ipv4_dst),
> +                      ipv4_tun_key->ipv4_tos, ipv4_tun_key->ipv4_ttl);
> +        break;
> +
>     case OVS_KEY_ATTR_IN_PORT:
>         ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a));
>         break;
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index 16f2b15..57073ba 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -80,6 +80,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
>  *                         ------  ---  ------  -----
>  *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
>  *  OVS_KEY_ATTR_TUN_ID        8    --     4     12
> + *  OVS_KEY_ATTR_IPV4_TUNNEL  24    --     4     28
>  *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
>  *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
>  *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype)
> @@ -90,7 +91,7 @@ int odp_actions_from_string(const char *, const struct simap *port_names,
>  *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
>  *  OVS_KEY_ATTR_ND           28    --     4     32
>  *  -------------------------------------------------
> - *  total                                       156
> + *  total                                       184
>  *
>  * We include some slack space in case the calculation isn't quite right or we
>  * add another field and forget to adjust this value.
> -- 
> 1.7.10
> 



More information about the dev mailing list