[ovs-dev] [PATCH v3] gre: Restructure tunneling.
Rajahalme, Jarno (NSN - FI/Espoo)
jarno.rajahalme at nsn.com
Fri May 24 06:12:29 UTC 2013
Pravin,
Please find some review comments below,
Jarno
On May 23, 2013, at 23:01 , ext Pravin B Shelar wrote:
...
> diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
> new file mode 100644
> index 0000000..c352ff8
> --- /dev/null
> +++ b/datapath/linux/compat/gre.c
> @@ -0,0 +1,352 @@
> +/*
> + * Copyright (c) 2007-2013 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#include <linux/module.h>
> +#include <linux/if.h>
> +#include <linux/if_tunnel.h>
> +#include <linux/icmp.h>
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/kernel.h>
> +#include <linux/kmod.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/spinlock.h>
> +
> +#include <net/gre.h>
> +#include <net/icmp.h>
> +#include <net/protocol.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> +#include "gso.h"
> +
> +static struct gre_cisco_protocol __rcu *gre_cisco_proto;
> +
> +static void gre_csum_fix(struct sk_buff *skb)
> +{
> + struct gre_base_hdr *greh;
> + __be32 *options;
> + int gre_offset = skb_transport_offset(skb);
> +
> + greh = (struct gre_base_hdr *)skb_transport_header(skb);
> + options = ((__be32 *)greh + 1);
> +
> + *options = 0;
> + *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
> + skb->len - gre_offset, 0));
> +}
> +
> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
> +{
> + int err;
> +
> + skb_reset_inner_headers(skb);
> +
> + if (skb_is_gso(skb)) {
> + if (gre_csum)
> + OVS_GSO_CB(skb)->fix_segment = gre_csum_fix;
> + } else {
> + if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
> + err = skb_checksum_help(skb);
> + if (err)
> + goto error;
> +
> + } else if (skb->ip_summed != CHECKSUM_PARTIAL)
> + skb->ip_summed = CHECKSUM_NONE;
> + }
> + return skb;
> +error:
> + kfree_skb(skb);
> + return ERR_PTR(err);
> +}
> +
> +static bool is_gre_gso(struct sk_buff *skb)
> +{
> + return skb_is_gso(skb);
> +}
What is the value of this? You already use skb_is_gso() above.
> +
> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
> + int hdr_len)
> +{
> + struct gre_base_hdr *greh;
> +
> + __skb_push(skb, hdr_len);
> +
> + greh = (struct gre_base_hdr *)skb->data;
> + greh->flags = tnl_flags_to_gre_flags(tpi->flags);
> + greh->protocol = tpi->proto;
> +
> + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
Add some spaces? Also below.
> + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
> +
> + if (tpi->flags&TUNNEL_SEQ) {
> + *ptr = tpi->seq;
> + ptr--;
> + }
> + if (tpi->flags&TUNNEL_KEY) {
> + *ptr = tpi->key;
> + ptr--;
> + }
> + if (tpi->flags&TUNNEL_CSUM && !is_gre_gso(skb)) {
> + *ptr = 0;
> + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
> + skb->len, 0));
> + }
> + }
> +}
> +
> +static __sum16 check_checksum(struct sk_buff *skb)
> +{
> + __sum16 csum = 0;
> +
> + switch (skb->ip_summed) {
> + case CHECKSUM_COMPLETE:
> + csum = csum_fold(skb->csum);
> +
> + if (!csum)
> + break;
> + /* Fall through. */
> +
> + case CHECKSUM_NONE:
> + skb->csum = 0;
> + csum = __skb_checksum_complete(skb);
> + skb->ip_summed = CHECKSUM_COMPLETE;
> + break;
> + }
> +
> + return csum;
> +}
> +
> +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
> + bool *csum_err)
It seems the csum_err returned via the parameter pointer is ignored by the caller, hence it could be removed.
> +{
> + unsigned int ip_hlen = ip_hdrlen(skb);
> + struct gre_base_hdr *greh;
> + __be32 *options;
> + int hdr_len;
> +
> + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
> + return -EINVAL;
> +
> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
> + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
> + return -EINVAL;
> +
> + tpi->flags = gre_flags_to_tnl_flags(greh->flags);
> + hdr_len = ip_gre_calc_hlen(tpi->flags);
> +
> + if (!pskb_may_pull(skb, hdr_len))
> + return -EINVAL;
> +
> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
> + tpi->proto = greh->protocol;
> +
> + options = (__be32 *)(greh + 1);
> + if (greh->flags & GRE_CSUM) {
> + if (check_checksum(skb)) {
> + *csum_err = true;
> + return -EINVAL;
> + }
> + options++;
> + }
> +
> + if (greh->flags & GRE_KEY) {
> + tpi->key = *options;
> + options++;
> + } else
> + tpi->key = 0;
> +
> + if (unlikely(greh->flags & GRE_SEQ)) {
> + tpi->seq = *options;
> + options++;
> + } else
> + tpi->seq = 0;
> +
> + /* WCCP version 1 and 2 protocol decoding.
> + * - Change protocol to IP
> + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
> + */
> + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
> + tpi->proto = htons(ETH_P_IP);
> + if ((*(u8 *)options & 0xF0) != 0x40) {
> + hdr_len += 4;
> + if (!pskb_may_pull(skb, hdr_len))
> + return -EINVAL;
> + }
> + }
> +
> + return iptunnel_pull_header(skb, hdr_len, tpi->proto);
> +}
> +
> +static int gre_cisco_rcv(struct sk_buff *skb)
> +{
> + struct tnl_ptk_info tpi;
> + bool csum_err = false;
> + struct gre_cisco_protocol *proto;
> +
> + rcu_read_lock();
> + proto = rcu_dereference(gre_cisco_proto);
> + if (!proto)
> + goto drop;
> +
> + if (parse_gre_header(skb, &tpi, &csum_err) < 0)
> + goto drop;
> + proto->handler(skb, &tpi);
> + rcu_read_unlock();
> + return 0;
> +
> +drop:
> + rcu_read_unlock();
> + kfree_skb(skb);
> + return 0;
> +}
> +
> +static const struct gre_protocol ipgre_protocol = {
> + .handler = gre_cisco_rcv,
> +};
> +
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
> +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
> +
> +int gre_add_protocol(const struct gre_protocol *proto, u8 version)
> +{
> + if (version >= GREPROTO_MAX)
> + return -EINVAL;
> +
> + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
> + 0 : -EBUSY;
> +}
> +
> +int gre_del_protocol(const struct gre_protocol *proto, u8 version)
> +{
> + int ret;
> +
> + if (version >= GREPROTO_MAX)
> + return -EINVAL;
> +
> + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
> + 0 : -EBUSY;
> +
> + if (ret)
> + return ret;
> +
> + synchronize_net();
> + return 0;
> +}
> +
> +static int gre_rcv(struct sk_buff *skb)
> +{
> + const struct gre_protocol *proto;
> + u8 ver;
> + int ret;
> +
> + if (!pskb_may_pull(skb, 12))
> + goto drop;
Is there a symbol you could use here instead the literal 12?
> +
> + ver = skb->data[1]&0x7f;
Spaces...
> + if (ver >= GREPROTO_MAX)
> + goto drop;
> +
> + rcu_read_lock();
> + proto = rcu_dereference(gre_proto[ver]);
> + if (!proto || !proto->handler)
> + goto drop_unlock;
> + ret = proto->handler(skb);
> + rcu_read_unlock();
> + return ret;
> +
> +drop_unlock:
> + rcu_read_unlock();
> +drop:
> + kfree_skb(skb);
> + return NET_RX_DROP;
> +}
> +
...
> diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
> new file mode 100644
> index 0000000..f6a4ad3
> --- /dev/null
> +++ b/datapath/linux/compat/gso.c
> @@ -0,0 +1,166 @@
> +/*
> + * Copyright (c) 2007-2013 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#include <linux/module.h>
> +#include <linux/if.h>
> +#include <linux/if_tunnel.h>
> +#include <linux/icmp.h>
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/kernel.h>
> +#include <linux/kmod.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/spinlock.h>
> +
> +#include <net/gre.h>
> +#include <net/icmp.h>
> +#include <net/protocol.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> +#include "gso.h"
> +
> +static __be16 skb_network_protocol(struct sk_buff *skb)
> +{
> + __be16 type = skb->protocol;
> + int vlan_depth = ETH_HLEN;
Shouldn't this be 2*ETH_ALEN instead, since that is where VLAN header starts at if there is one?
> +
> + while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
> + struct vlan_hdr *vh;
> +
> + if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
> + return 0;
> +
> + vh = (struct vlan_hdr *)(skb->data + vlan_depth);
> + type = vh->h_vlan_encapsulated_proto;
> + vlan_depth += VLAN_HLEN;
> + }
> +
> + return type;
> +}
> +
> +static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
> + netdev_features_t features,
> + bool tx_path)
> +{
> + struct iphdr *iph = ip_hdr(skb);
> + int tnl_hlen = skb_inner_network_offset(skb);
> + struct sk_buff *skb1 = skb;
> + struct sk_buff *segs;
> +
> + __skb_pull(skb, tnl_hlen);
> + skb_reset_mac_header(skb);
> + skb_reset_network_header(skb);
> + skb_reset_transport_header(skb);
> + skb->protocol = skb_network_protocol(skb);
> +
> + segs = __skb_gso_segment(skb, 0, true);
Are you sure you want to ignore the tx_path and insist on 'true' here? Using 'false' selects a looser check of skb->ip_summed at __skb_gso_segment. We have seen the check implied by 'true' having the kernel fill the disk with non-rate-limited warnings, so this may be a significant choice you are making here.
> + if (!segs || IS_ERR(segs))
> + goto free;
> +
> + skb = segs;
> + while (skb) {
> + __skb_push(skb, tnl_hlen);
> + skb_reset_mac_header(skb);
> + skb_reset_network_header(skb);
> + skb_set_transport_header(skb, sizeof(struct iphdr));
> + skb->mac_len = 0;
> +
> + memcpy(ip_hdr(skb), iph, tnl_hlen);
Could you educate me a bit here and explain shortly why it is necessary to overwrite the IP headers of the segmented skbs?
> + if (OVS_GSO_CB(skb)->fix_segment)
> + OVS_GSO_CB(skb)->fix_segment(skb);
> + skb = skb->next;
> + }
> +free:
> + consume_skb(skb1);
> + return segs;
> +}
> +
> +static bool need_linearize(const struct sk_buff *skb)
> +{
> + int i;
> +
> + if (unlikely(skb_shinfo(skb)->frag_list))
> + return true;
> +
> + /*
> + * Generally speaking we should linearize if there are paged frags.
> + * However, if all of the refcounts are 1 we know nobody else can
> + * change them from underneath us and we can skip the linearization.
> + */
> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> + if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1))
> + return true;
> +
> + return false;
> +}
> +
> +int ip_local_out(struct sk_buff *skb)
IMO it would be less surprising if you used 'rpl_ip_local_out' here. Don't know if that would go against a convention though.
> +{
> + struct dst_entry *dst = skb_dst(skb);
> + int ret = NETDEV_TX_OK;
> +
> + if (skb_is_gso(skb)) {
> + skb = tnl_skb_gso_segment(skb, 0, true);
Maybe should use 'false' as the tx_path argument (Jesse?).
> + if (!skb || IS_ERR(skb))
> + return 0;
> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + int err;
> +
> + if (unlikely(need_linearize(skb))) {
> + err = __skb_linearize(skb);
> + if (unlikely(err))
> + return 0;
> + }
> +
> + err = skb_checksum_help(skb);
> + if (unlikely(err))
> + return 0;
> + }
> +
> + while (skb) {
> + struct sk_buff *next_skb = skb->next;
> + int err;
> +
> + if (next_skb)
> + skb_dst_set(skb, dst_clone(dst));
> + else
> + skb_dst_set(skb, dst);
> +
> + skb->next = NULL;
> +
> + /*
> + * Allow our local IP stack to fragment the outer packet even
> + * if the DF bit is set as a last resort. We also need to
> + * force selection of an IP ID here because Linux will
> + * otherwise leave it at 0 if the packet originally had DF set.
> + */
This comment needs editing as the IP ID is not set here.
> +
> + skb->local_df = 1;
> + memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
> +
Is clearing of the skb control buffer necessary here?
> +#undef ip_local_out
> + err = ip_local_out(skb);
> + if (unlikely(net_xmit_eval(err)))
> + ret = err;
> +
> + skb = next_skb;
> + }
> + return ret;
> +}
...
> diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h
> new file mode 100644
> index 0000000..16dc54e
> --- /dev/null
> +++ b/datapath/linux/compat/include/net/gre.h
> @@ -0,0 +1,109 @@
> +#ifndef __LINUX_GRE_WRAPPER_H
> +#define __LINUX_GRE_WRAPPER_H
> +
> +#include <linux/skbuff.h>
> +#include <net/ip_tunnels.h>
> +
> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
> +#include_next <net/gre.h>
> +
> +#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */
> +
> +#define GREPROTO_CISCO 0
> +#define GREPROTO_PPTP 1
> +#define GREPROTO_MAX 2
> +#define GRE_IP_PROTO_MAX 2
> +
> +struct gre_protocol {
> + int (*handler)(struct sk_buff *skb);
> + void (*err_handler)(struct sk_buff *skb, u32 info);
> +};
> +
> +int gre_add_protocol(const struct gre_protocol *proto, u8 version);
> +int gre_del_protocol(const struct gre_protocol *proto, u8 version);
> +
> +#endif
> +
> +#define GRE_IP_PROTO_MAX 2
> +
> +struct gre_base_hdr {
> + __be16 flags;
> + __be16 protocol;
> +};
> +#define GRE_HEADER_SECTION 4
> +
> +#define MAX_GRE_PROTO_PRIORITY 255
> +struct gre_cisco_protocol {
> + int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
> + int (*err_handler)(struct sk_buff *skb, u32 info,
> + const struct tnl_ptk_info *tpi);
> + u8 priority;
> +};
> +
> +#define gre_build_header rpl_gre_build_header
> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
> + int hdr_len);
> +
> +#define gre_handle_offloads rpl_gre_handle_offloads
> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
> +
> +int gre_cisco_register(struct gre_cisco_protocol *proto);
> +int gre_cisco_unregister(struct gre_cisco_protocol *proto);
> +
> +static inline int ip_gre_calc_hlen(__be16 o_flags)
> +{
> + int addend = 4;
> +
> + if (o_flags&TUNNEL_CSUM)
Spaces...
> + addend += 4;
> + if (o_flags&TUNNEL_KEY)
> + addend += 4;
> + if (o_flags&TUNNEL_SEQ)
> + addend += 4;
> + return addend;
> +}
> +
...
> diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
> new file mode 100644
> index 0000000..ad17c9d
> --- /dev/null
> +++ b/datapath/linux/compat/include/net/ip_tunnels.h
> @@ -0,0 +1,54 @@
> +#ifndef __NET_IP_TUNNELS_WRAPPER_H
> +#define __NET_IP_TUNNELS_WRAPPER_H 1
> +
> +#include <linux/if_tunnel.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/types.h>
> +#include <net/dsfield.h>
> +#include <net/flow.h>
> +#include <net/inet_ecn.h>
> +#include <net/ip.h>
> +#include <net/rtnetlink.h>
> +
> +#define TUNNEL_CSUM __cpu_to_be16(0x01)
> +#define TUNNEL_ROUTING __cpu_to_be16(0x02)
> +#define TUNNEL_KEY __cpu_to_be16(0x04)
> +#define TUNNEL_SEQ __cpu_to_be16(0x08)
> +#define TUNNEL_STRICT __cpu_to_be16(0x10)
> +#define TUNNEL_REC __cpu_to_be16(0x20)
> +#define TUNNEL_VERSION __cpu_to_be16(0x40)
> +#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
> +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
> +
> +struct tnl_ptk_info {
> + __be16 flags;
> + __be16 proto;
> + __be32 key;
> + __be32 seq;
> +};
> +
> +#define PACKET_RCVD 0
> +#define PACKET_REJECT 1
> +
> +static inline void tunnel_ip_select_ident(struct sk_buff *skb,
> + const struct iphdr *old_iph,
> + struct dst_entry *dst)
> +{
> + struct iphdr *iph = ip_hdr(skb);
> +
> + /* Use inner packet iph-id if possible. */
> + if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
> + iph->id = old_iph->id;
Was it concluded that it is safe to use the inner packet ip id? I get that this is a copy of upstream code, but would like to know.
> + else
> + __ip_select_ident(iph, dst,
> + (skb_shinfo(skb)->gso_segs ?: 1) - 1);
> +}
> +
> +int iptunnel_xmit(struct net *net, struct rtable *rt,
> + struct sk_buff *skb,
> + __be32 src, __be32 dst, __u8 proto,
> + __u8 tos, __u8 ttl, __be16 df);
> +
> +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
> +#endif /* __NET_IP_TUNNELS_H */
> diff --git a/datapath/linux/compat/ip_tunnels_core.c b/datapath/linux/compat/ip_tunnels_core.c
> new file mode 100644
> index 0000000..f8c8488
> --- /dev/null
> +++ b/datapath/linux/compat/ip_tunnels_core.c
> @@ -0,0 +1,115 @@
> +/*
> + * Copyright (c) 2007-2013 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> + * 02110-1301, USA
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/in.h>
> +#include <linux/in_route.h>
> +#include <linux/inetdevice.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/version.h>
> +#include <linux/workqueue.h>
> +#include <linux/rculist.h>
> +#include <net/ip_tunnels.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
> +#include "gso.h"
> +#include "compat.h"
> +
> +int iptunnel_xmit(struct net *net, struct rtable *rt,
> + struct sk_buff *skb,
> + __be32 src, __be32 dst, __u8 proto,
> + __u8 tos, __u8 ttl, __be16 df)
> +{
> + int pkt_len = skb->len;
> + struct iphdr *iph;
> + int err;
> +
> + nf_reset(skb);
> + secpath_reset(skb);
> + skb_clear_rxhash(skb);
> + skb_dst_drop(skb);
> + skb_dst_set(skb, &rt_dst(rt));
> +#if 0
> + /* Do not clear ovs-callback. It will be done in gso code. */
> + memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
> +#endif
Why does the skb control buffer need to be cleared in the first place?
> +
> + /* Push down and install the IP header. */
> + __skb_push(skb, sizeof(struct iphdr));
> + skb_reset_network_header(skb);
> +
> + iph = ip_hdr(skb);
> +
> + iph->version = 4;
> + iph->ihl = sizeof(struct iphdr) >> 2;
> + iph->frag_off = df;
> + iph->protocol = proto;
> + iph->tos = tos;
> + iph->daddr = dst;
> + iph->saddr = src;
> + iph->ttl = ttl;
> + tunnel_ip_select_ident(skb,
> + (const struct iphdr *)skb_inner_network_header(skb),
> + &rt_dst(rt));
> +
> + err = ip_local_out(skb);
> + if (unlikely(net_xmit_eval(err)))
> + pkt_len = 0;
> + return pkt_len;
> +}
> +
...
> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
> index add17d9..1707eab 100644
> --- a/datapath/vport-gre.c
> +++ b/datapath/vport-gre.c
> @@ -24,50 +24,29 @@
> #include <linux/if_tunnel.h>
> #include <linux/if_vlan.h>
> #include <linux/in.h>
> +#include <linux/if_vlan.h>
> +#include <linux/in.h>
> +#include <linux/in_route.h>
> +#include <linux/inetdevice.h>
> +#include <linux/jhash.h>
> +#include <linux/list.h>
> +#include <linux/kernel.h>
> +#include <linux/workqueue.h>
> +#include <linux/rculist.h>
> +#include <net/route.h>
> +#include <net/xfrm.h>
> +
>
> #include <net/icmp.h>
> #include <net/ip.h>
> +#include <net/ip_tunnels.h>
> +#include <net/gre.h>
> #include <net/protocol.h>
>
> #include "datapath.h"
> #include "tunnel.h"
> #include "vport.h"
>
> -/*
> - * The GRE header is composed of a series of sections: a base and then a variable
> - * number of options.
> - */
> -#define GRE_HEADER_SECTION 4
> -
> -struct gre_base_hdr {
> - __be16 flags;
> - __be16 protocol;
> -};
> -
> -static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
> -{
> - int len = GRE_HEADER_SECTION;
> -
> - if (tun_key->tun_flags & OVS_TNL_F_KEY)
> - len += GRE_HEADER_SECTION;
> - if (tun_key->tun_flags & OVS_TNL_F_CSUM)
> - len += GRE_HEADER_SECTION;
> - return len;
> -}
> -
> -static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
> -{
> - /* Set key for GRE64 tunnels, even when key if is zero. */
> - int len = GRE_HEADER_SECTION + /* GRE Hdr */
> - GRE_HEADER_SECTION + /* GRE Key */
> - GRE_HEADER_SECTION; /* GRE SEQ */
> -
> - if (tun_key->tun_flags & OVS_TNL_F_CSUM)
> - len += GRE_HEADER_SECTION;
> -
> - return len;
> -}
> -
> /* Returns the least-significant 32 bits of a __be64. */
> static __be32 be64_get_low32(__be64 x)
> {
> @@ -78,61 +57,31 @@ static __be32 be64_get_low32(__be64 x)
> #endif
> }
>
> -static __be32 be64_get_high32(__be64 x)
> +static __be16 filter_tnl_flags(__be16 flags)
> {
> -#ifdef __BIG_ENDIAN
> - return (__force __be32)((__force u64)x >> 32);
> -#else
> - return (__force __be32)x;
> -#endif
> + return (flags & (TUNNEL_CSUM | TUNNEL_KEY));
> }
>
> -static void __gre_build_header(struct sk_buff *skb,
> - int tunnel_hlen,
> - bool is_gre64)
> +static struct sk_buff *__build_header(const struct vport *vport,
'vport' is not used, so it could be removed here.
> + struct sk_buff *skb,
> + int tunnel_hlen,
> + __be32 seq, __be16 gre64_flag)
> {
> const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
> - __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
> - - GRE_HEADER_SECTION);
> - struct gre_base_hdr *greh = (struct gre_base_hdr *) skb_transport_header(skb);
> - greh->protocol = htons(ETH_P_TEB);
> - greh->flags = 0;
> -
> - /* Work backwards over the options so the checksum is last. */
> - if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
> - greh->flags |= GRE_KEY;
> - if (is_gre64) {
> - /* Set higher 32 bits to seq. */
> - *options = be64_get_high32(tun_key->tun_id);
> - options--;
> - greh->flags |= GRE_SEQ;
> - }
> - *options = be64_get_low32(tun_key->tun_id);
> - options--;
> - }
> + struct tnl_ptk_info tpi;
>
> - if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
> - greh->flags |= GRE_CSUM;
> - *options = 0;
> - *(__sum16 *)options = csum_fold(skb_checksum(skb,
> - skb_transport_offset(skb),
> - skb->len - skb_transport_offset(skb),
> - 0));
> - }
> -}
> + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
> + if (IS_ERR(skb))
> + return NULL;
>
> -static void gre_build_header(const struct vport *vport,
> - struct sk_buff *skb,
> - int tunnel_hlen)
> -{
> - __gre_build_header(skb, tunnel_hlen, false);
> -}
> + tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
>
> -static void gre64_build_header(const struct vport *vport,
> - struct sk_buff *skb,
> - int tunnel_hlen)
> -{
> - __gre_build_header(skb, tunnel_hlen, true);
> + tpi.proto = htons(ETH_P_TEB);
> + tpi.key = be64_get_low32(tun_key->tun_id);
> + tpi.seq = seq;
> + gre_build_header(skb, &tpi, tunnel_hlen);
> +
> + return skb;
> }
>
More information about the dev
mailing list