[ovs-dev] [PATCH] datapath: Integration with upstream kernel tunneling.

Pravin Shelar pshelar at nicira.com
Thu Apr 4 17:42:49 UTC 2013


On Tue, Apr 2, 2013 at 6:09 AM, Rajahalme, Jarno (NSN - FI/Espoo) <
jarno.rajahalme at nsn.com> wrote:

>
> On Mar 30, 2013, at 18:20 , ext Pravin B Shelar wrote:
>
> > Following patch restructure ovs tunneling to make use of kernel
> > api. Doing this tunneling code is simplified as most of protocol
> > processing on send and recv is pushed to kernel tunneling. This
> > way we can share most protocol related code between openvswitch
> > tunneling and linux tunnel devices.
>
> Maybe even more tunneling code could be shared between the UDP tunnel
> protocols,
> see below for this and some other comments.
>
>   Jarno
>
> ...
> > diff --git a/datapath/datapath.c b/datapath/datapath.c
> > index 9cd4b0e..4d9a3b0 100644
> > --- a/datapath/datapath.c
> > +++ b/datapath/datapath.c
> > @@ -49,8 +49,10 @@
> > #include <linux/rculist.h>
> > #include <linux/dmi.h>
> > #include <net/genetlink.h>
> > +#include <net/gre.h>
> > #include <net/net_namespace.h>
> > #include <net/netns/generic.h>
> > +#include <net/vxlan.h>
> >
> > #include "checksum.h"
> > #include "datapath.h"
> > @@ -61,7 +63,7 @@
> > #include "vport-internal_dev.h"
> >
> > #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
> > -    LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)
> > +    LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
> > #error Kernels before 2.6.18 or after 3.8 are not supported by this
> version of Open vSwitch.
>
> The error text needs an update as well.
>
> ...
> > diff --git a/datapath/linux/compat/include/net/ip_tunnels.h
> b/datapath/linux/compat/include/net/ip_tunnels.h
> > new file mode 100644
> > index 0000000..16667e9
> > --- /dev/null
> > +++ b/datapath/linux/compat/include/net/ip_tunnels.h
> > @@ -0,0 +1,69 @@
> > +#ifndef __NET_IP_TUNNELS_WRAPPER_H
> > +#define __NET_IP_TUNNELS_WRAPPER_H 1
> > +
> > +#if LINUX_VERSION_CODE > KERNEL_VERSION(3,9,0)
> > +#include_next <net/ip_tunnels.h>
> > +#else
> > +#include <linux/if_tunnel.h>
> > +#include <linux/netdevice.h>
> > +#include <linux/skbuff.h>
> > +#include <linux/types.h>
> > +#include <net/dsfield.h>
> > +#include <net/inet_ecn.h>
> > +#include <net/ip.h>
> > +#include <net/rtnetlink.h>
> > +
> > +#if 0
> > +#include <net/ipv6.h>
> > +#include <net/ip6_fib.h>
> > +#include <net/ip6_route.h>
> > +#endif
> > +
>
> Is this #if 0 section still needed?
>

ok, I will remove it.


>
> > +#define TUNNEL_CSUM  __cpu_to_be16(0x01)
> > +#define TUNNEL_ROUTING       __cpu_to_be16(0x02)
> > +#define TUNNEL_KEY   __cpu_to_be16(0x04)
> > +#define TUNNEL_SEQ   __cpu_to_be16(0x08)
> > +#define TUNNEL_STRICT        __cpu_to_be16(0x10)
> > +#define TUNNEL_REC   __cpu_to_be16(0x20)
> > +#define TUNNEL_VERSION       __cpu_to_be16(0x40)
> > +#define TUNNEL_NO_KEY        __cpu_to_be16(0x80)
> > +
> > +struct tnl_ptk_info {
> > +     __be16 flags;
> > +     __be16 proto;
> > +     __be32 key;
> > +     __be32 seq;
> > +};
> > +
> > +#define PACKET_RCVD  0
> > +#define PACKET_REJECT        1
> > +
> > +static inline void tunnel_ip_select_ident(struct sk_buff *skb,
> > +                                       const struct iphdr  *old_iph,
> > +                                       struct dst_entry *dst)
> > +{
> > +     struct iphdr *iph = ip_hdr(skb);
> > +
> > +     /* Use inner packet iph-id if possible. */
> > +     if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
> > +             iph->id = old_iph->id;
>
> > +     else
> > +             __ip_select_ident(iph, dst,
> > +                               (skb_shinfo(skb)->gso_segs ?: 1) - 1);
> > +}
> > +
>
>
> This might be the wrong place to comment on this, since this is
> compatibility code, but the practice of using the id
> from the inner header is not completely safe. In the case where packets
> from multiple sources enter the same tunnel,
> it is possible for those packets to have the same id, which would then
> confuse the reassembler at the other end,
> since on reception the outer headers IP source and destination will be the
> same for all the packets received from the
> same tunnel.
>
> Such a collision should be very rare, so maybe it has been considered to
> be safe enough to warrant the saving on
> proper ID calculation.
>

I agree, I will send separate patch for this since this need to discussed
on netdev list.


>
> > +static inline u16 tunnel_src_port(__u16 port_max, __u16 port_min,
> > +                               struct sk_buff *skb)
> > +{
> > +     unsigned int range = (port_max - port_min) + 1;
> > +     u32 hash;
> > +
> > +     hash = skb_get_rxhash(skb);
> > +     if (!hash)
> > +             hash = jhash(skb->data, 2 * ETH_ALEN,
> > +                          (__force u32) skb->protocol);
> > +
> > +     return (((u64) hash * range) >> 32) + port_min;
> > +}
> > +#endif
> > +#endif /* __NET_IP_TUNNELS_H */
> > diff --git a/datapath/linux/compat/include/net/vxlan.h
> b/datapath/linux/compat/include/net/vxlan.h
> > new file mode 100644
> > index 0000000..3ad26bb
> > --- /dev/null
> > +++ b/datapath/linux/compat/include/net/vxlan.h
> > @@ -0,0 +1,45 @@
> > +#ifndef __NET_IP_VXLAN_WRAPPER_H
> > +#define __NET_IP_VXLAN_WRAPPER_H 1
> > +
> > +#include <linux/skbuff.h>
> > +#include <linux/netdevice.h>
> > +#include <linux/udp.h>
> > +
> > +#if LINUX_VERSION_CODE > KERNEL_VERSION(3,9,0)
> > +#include_next <net/vxlan.h>
> > +
> > +static inline int vxlan_compat_init(void)
> > +{
> > +     return 0;
> > +}
> > +static inline void vxlan_compat_cleanup(void)
> > +{
> > +
> > +}
> > +#else
> > +
> > +/* VXLAN protocol header */
> > +struct vxlanhdr {
> > +     __be32 vx_flags;
> > +     __be32 vx_vni;
> > +};
> > +
> > +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
> > +
> > +struct vxlan_port {
> > +     int (*vx_rcv)(struct vxlan_port *port, struct sk_buff *skb, __be32
> key);
> > +     void *user_data;
> > +     struct socket *sock;
> > +     __be16 portno;
> > +};
>
> There is little vxlan specific in this structure (the key length, but even
> that is fairly common).
> It seems to me that this could be used for other UDP tunnels with 32-bit
> keys as well.
> This could be factored out and named like "udp_tunnel_port". The code
> using "vxlan_port" in
> vport-vxlan.c used to be the same as the equivalent for struct lisp_port
> in vport-lisp.c.
> In this patch vxlan and lisp diverge structurally, maybe due to vxlan
> being upstream while
> lisp is not.
> Anyway, if the same structures were used for both, upstreaming lisp later
> would be
> somewhat easier.
>
> ok.


> > +
> > +struct sk_buff *vxlan_build_header(const struct vxlan_port *port,
> > +                                __u16 src_port, struct sk_buff *skb,
> > +                                __be32 vni);
> > +int vxlan_add_handler(struct net *net, struct vxlan_port *);
> > +void vxlan_del_handler(struct net *net, const struct vxlan_port *port);
> > +
> > +int vxlan_compat_init(void);
> > +void vxlan_compat_cleanup(void);
> > +#endif
> > +#endif
> > diff --git a/datapath/linux/compat/vxlan.c
> b/datapath/linux/compat/vxlan.c
> > new file mode 100644
> > index 0000000..26413c1
> > --- /dev/null
> > +++ b/datapath/linux/compat/vxlan.c
> > @@ -0,0 +1,311 @@
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/types.h>
> > +#include <linux/module.h>
> > +#include <linux/errno.h>
> > +#include <linux/slab.h>
> > +#include <linux/skbuff.h>
> > +#include <linux/rculist.h>
> > +#include <linux/netdevice.h>
> > +#include <linux/in.h>
> > +#include <linux/ip.h>
> > +#include <linux/udp.h>
> > +#include <linux/igmp.h>
> > +#include <linux/etherdevice.h>
> > +#include <linux/if_ether.h>
> > +#include <linux/hash.h>
> > +#include <linux/ethtool.h>
> > +#include <net/arp.h>
> > +#include <net/ndisc.h>
> > +#include <net/ip.h>
> > +#include <net/ip_tunnels.h>
> > +#include <net/icmp.h>
> > +#include <net/udp.h>
> > +#include <net/rtnetlink.h>
> > +#include <net/route.h>
> > +#include <net/dsfield.h>
> > +#include <net/inet_ecn.h>
> > +#include <net/net_namespace.h>
> > +#include <net/netns/generic.h>
> > +#include <net/vxlan.h>
> > +
> > +#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,9,0) && \
> > +    LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
> > +
> > +#define VXLAN_FLAGS 0x08000000       /* struct vxlanhdr.vx_flags
> required value. */
> > +
> > +static int vxlan_net_id;
> > +
> > +static DEFINE_MUTEX(vxlan_mutex);
> > +
> > +#define MAX_VXLAN_PORTS      8
> > +struct vxlan_net {
> > +     struct vxlan_port __rcu *vxlan_ports[MAX_VXLAN_PORTS];
> > +};
> > +
> > +static struct pernet_operations vxlan_net_ops = {
> > +     .id   = &vxlan_net_id,
> > +     .size = sizeof(struct vxlan_net),
> > +};
> > +static unsigned int port_range;
> > +static int port_low;
> > +static int port_high;
> > +
> > +int vxlan_compat_init(void)
> > +{
> > +     inet_get_local_port_range(&port_low, &port_high);
> > +     port_range = (port_high - port_low) + 1;
> > +
> > +     return register_pernet_device(&vxlan_net_ops);
> > +}
> > +
> > +void vxlan_compat_cleanup(void)
> > +{
> > +     unregister_pernet_device(&vxlan_net_ops);
> > +}
> > +
> > +/* Callback from net/ipv4/udp.c to receive packets */
> > +static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
> > +{
> > +     struct vxlan_net *vn = net_generic(dev_net(skb->dev),
> vxlan_net_id);
> > +     struct vxlanhdr *vxh;
> > +     int i;
> > +
> > +     /* pop off outer UDP header */
> > +     __skb_pull(skb, sizeof(struct udphdr));
> > +
> > +     /* Need Vxlan and inner Ethernet header to be present */
> > +     if (!pskb_may_pull(skb, sizeof(struct vxlanhdr)))
> > +             goto error;
>
> Maybe change the comment since the presence of the inner Ethernet header
> is not tested here?
>
> ok, I will fix that comment.

> > +
> > +     /* Drop packets with reserved bits set */
> > +     vxh = (struct vxlanhdr *) skb->data;
> > +     if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
> > +         (vxh->vx_vni & htonl(0xff))) {
> > +             netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
> > +                        ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
> > +             goto error;
> > +     }
> > +
> > +     __skb_pull(skb, sizeof(struct vxlanhdr));
> > +
> > +     rcu_read_lock();
> > +     for (i = 0; i < MAX_VXLAN_PORTS; i++) {
> > +             struct vxlan_port *port =
> rcu_dereference(vn->vxlan_ports[i]);
> > +             int ret;
> > +
> > +             if (!port)
> > +                     continue;
> > +             if (port->portno != udp_hdr(skb)->dest)
> > +                     continue;
> > +
> > +             ret = port->vx_rcv(port, skb, vxh->vx_vni);
> > +             if (ret == PACKET_RCVD) {
> > +                     rcu_read_unlock();
> > +                     return 0;
> > +             }
> > +     }
> > +     rcu_read_unlock();
> > +error:
> > +     /*Put UDP header hack*/
> > +     __skb_push(skb, sizeof(struct udphdr));
> > +     return 1;
> > +}
> > +
> > +struct socket *vxlan_create_socket(struct net *net, __be16 portno)
> > +{
> > +     struct socket *sock;
> > +     struct sock *sk;
> > +     struct sockaddr_in vxlan_addr = {
> > +             .sin_family = AF_INET,
> > +             .sin_addr.s_addr = htonl(INADDR_ANY),
> > +     };
> > +     int rc;
> > +
> > +     /* Create UDP socket for encapsulation receive. */
> > +     rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
> > +     if (rc < 0) {
> > +             pr_debug("UDP socket create failed\n");
> > +             return ERR_PTR(rc);
> > +     }
> > +     /* Put in proper namespace */
> > +     sk = sock->sk;
> > +     sk_change_net(sk, net);
> > +
> > +     vxlan_addr.sin_port = portno;
> > +
> > +     rc = kernel_bind(sock, (struct sockaddr *) &vxlan_addr,
> > +                      sizeof(vxlan_addr));
> > +     if (rc < 0) {
> > +             pr_debug("bind for UDP socket %pI4:%u (%d)\n",
> > +                      &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port),
> rc);
> > +             sk_release_kernel(sk);
> > +             return ERR_PTR(rc);
> > +     }
> > +
> > +     /* Disable multicast loopback */
> > +     inet_sk(sk)->mc_loop = 0;
> > +
> > +     /* Mark socket as an encapsulation socket. */
> > +     udp_sk(sk)->encap_type = 1;
> > +     udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
> > +     udp_encap_enable();
> > +
> > +     return sock;
> > +}
>
> This looks like largely vxlan-independent function as well. If the
> encap_recv pointer would be
> an argument, then this same function could be used for lisp and other UDP
> tunnel protocols as well.
>
> The same seems to apply for most of the following functions. There is very
> little that is vxlan
> specific in them, and it seems like they could apply to any UDP tunnel
> protocols.
>
> ok, I will move it to udp module.


> > +
> > +int vxlan_add_handler(struct net *net, struct vxlan_port *new)
> > +{
> ...
> > +}
> > +
> > +void vxlan_del_handler(struct net *net, const struct vxlan_port *del)
> > +{
> ...
> > +}
> > +
> > +static void vxlan_sock_free(struct sk_buff *skb)
> > +{
> > +     sock_put(skb->sk);
> > +}
> > +
> > +/* On transmit, associate with the tunnel socket */
> > +static void vxlan_set_owner(const struct vxlan_port *port, struct
> sk_buff *skb)
> > +{
> > +     struct sock *sk = port->sock->sk;
> > +
> > +     skb_orphan(skb);
> > +     sock_hold(sk);
> > +     skb->sk = sk;
> > +     skb->destructor = vxlan_sock_free;
> > +}
> > +
> > +#ifdef HAVE_SKB_GSO_UDP_TUNNEL
> > +static struct sk_buff *handle_offloads(struct sk_buff *skb)
> > +{
> > +     if (likely(!skb->encapsulation)) {
> > +             skb_reset_inner_headers(skb);
> > +             skb->encapsulation = 1;
> > +     }
> > +
> > +     if (skb_is_gso(skb)) {
> > +             int err = skb_unclone(skb, GFP_ATOMIC);
> > +             if (unlikely(err)) {
> > +                     kfree_skb(skb);
> > +                     return ERR_PTR(err);
> > +             }
> > +
> > +             skb_shinfo(skb)->gso_type |= (SKB_GSO_UDP_TUNNEL |
> SKB_GSO_UDP);
> > +     }
> > +
> > +     return skb;
> > +}
> > +#else
> > +static struct sk_buff *handle_offloads(struct sk_buff *skb)
> > +{
> > +     if (skb_is_gso(skb)) {
> > +             struct sk_buff *nskb;
> > +
> > +             nskb = __skb_gso_segment(skb, 0, false);
> > +             if (IS_ERR(nskb)) {
> > +                     kfree_skb(skb);
> > +                     return nskb;
> > +             }
> > +
> > +             consume_skb(skb);
> > +             skb = nskb;
> > +     }
> > +     return skb;
> > +}
> > +#endif
> > +
> ...
> > +#endif
> > diff --git a/datapath/tunnel.c b/datapath/tunnel.c
> > index 057aaed..bd0ee7e 100644
> > --- a/datapath/tunnel.c
> > +++ b/datapath/tunnel.c
> > @@ -28,6 +28,7 @@
> ...
> > -int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
> > +int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, u8 ipproto,
> > +              int tunnel_hlen,
> > +              struct sk_buff *(*build_header)(const struct vport *,
> > +                                              struct sk_buff *,
> > +                                              int tunnel_hlen))
> > {
> > -     struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
> > +     struct iphdr *inner_iph;
> > +     struct iphdr *iph;
> >       struct rtable *rt;
> >       __be32 saddr;
> >       int sent_len = 0;
> > -     int tunnel_hlen;
> > +     int min_headroom;
> >
> >       if (unlikely(!OVS_CB(skb)->tun_key))
> >               goto error_free;
> >
> > +     if (unlikely(vlan_deaccel_tag(skb)))
> > +             goto error;
> > +
> >       /* Route lookup */
> >       saddr = OVS_CB(skb)->tun_key->ipv4_src;
> >       rt = find_route(ovs_dp_get_net(vport->dp),
> >                       &saddr,
> >                       OVS_CB(skb)->tun_key->ipv4_dst,
> > -                     tnl_vport->tnl_ops->ipproto,
> > +                     ipproto,
> >                       OVS_CB(skb)->tun_key->ipv4_tos,
> >                       skb_get_mark(skb));
> >       if (IS_ERR(rt))
> >               goto error_free;
> >
> > +     skb_dst_drop(skb);
> > +     skb_dst_set(skb, &rt_dst(rt));
> > +
> >       /* Offloading */
> > -     tunnel_hlen = tnl_vport->tnl_ops->hdr_len(OVS_CB(skb)->tun_key);
> > -     tunnel_hlen += sizeof(struct iphdr);
> > +     min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) +
> rt_dst(rt).header_len
> > +                     + tunnel_hlen + sizeof(struct iphdr)
> > +                     + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
> >
> > -     skb = handle_offloads(skb, rt, tunnel_hlen);
> > -     if (IS_ERR(skb)) {
> > -             skb = NULL;
> > -             goto err_free_rt;
> > +     if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
> > +             int err;
> > +             int head_delta = SKB_DATA_ALIGN(min_headroom -
> > +                                             skb_headroom(skb) +
> > +                                             16);
> > +             err = pskb_expand_head(skb, max_t(int, head_delta, 0),
> > +                                     0, GFP_ATOMIC);
> > +             if (unlikely(err))
> > +                     goto error_free;
> >       }
> > +     forward_ip_summed(skb, true);
> > +
> > +     /* Push Tunnel header. */
> > +     skb = build_header(vport, skb, tunnel_hlen);
> > +     if (!skb)
> > +             goto error;
> > +
> > +     inner_iph = ip_hdr(skb);
> >
> >       /* Reset SKB */
> >       nf_reset(skb);
> >       secpath_reset(skb);
> > -     skb_dst_drop(skb);
> >       skb_clear_rxhash(skb);
> >
> > -     while (skb) {
> > -             struct sk_buff *next_skb = skb->next;
> > -             struct iphdr *iph;
> > -             int frag_len;
> > -             int err;
> > -
> > -             skb->next = NULL;
> > -
> > -             if (unlikely(vlan_deaccel_tag(skb)))
> > -                     goto next;
> > -
> > -             frag_len = skb->len;
> > -             skb_push(skb, tunnel_hlen);
> > -             skb_reset_network_header(skb);
> > -             skb_set_transport_header(skb, sizeof(struct iphdr));
> > -
> > -             if (next_skb)
> > -                     skb_dst_set(skb, dst_clone(&rt_dst(rt)));
> > -             else
> > -                     skb_dst_set(skb, &rt_dst(rt));
> > -
> > -             /* Push Tunnel header. */
> > -             tnl_vport->tnl_ops->build_header(vport, skb, tunnel_hlen);
> > -
> > -             /* Push IP header. */
> > -             iph = ip_hdr(skb);
> > -             iph->version    = 4;
> > -             iph->ihl        = sizeof(struct iphdr) >> 2;
> > -             iph->protocol   = tnl_vport->tnl_ops->ipproto;
> > -             iph->daddr      = OVS_CB(skb)->tun_key->ipv4_dst;
> > -             iph->saddr      = saddr;
> > -             iph->tos        = OVS_CB(skb)->tun_key->ipv4_tos;
> > -             iph->ttl        = OVS_CB(skb)->tun_key->ipv4_ttl;
> > -             iph->frag_off   = OVS_CB(skb)->tun_key->tun_flags &
> > -                               OVS_TNL_F_DONT_FRAGMENT ?  htons(IP_DF)
> : 0;
> > -             /*
> > -              * Allow our local IP stack to fragment the outer packet
> even
> > -              * if the DF bit is set as a last resort.  We also need to
> > -              * force selection of an IP ID here with
> __ip_select_ident(),
> > -              * as ip_select_ident() assumes a proper ID is not needed
> when
> > -              * when the DF bit is set.
> > -              */
> > -             skb->local_df = 1;
> > -             __ip_select_ident(iph, skb_dst(skb), 0);
> > -
> > -             memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
> > -
> > -             err = ip_local_out(skb);
> > -             if (unlikely(net_xmit_eval(err)))
> > -                     goto next;
> > -
> > -             sent_len += frag_len;
> > +     skb_push(skb, sizeof(struct iphdr));
> > +     skb_reset_network_header(skb);
> > +     skb_set_transport_header(skb, sizeof(struct iphdr));
> > +
> > +     /* Push IP header. */
> > +     iph = ip_hdr(skb);
> > +     iph->version    = 4;
> > +     iph->ihl        = sizeof(struct iphdr) >> 2;
> > +     iph->protocol   = ipproto;
> > +     iph->daddr      = OVS_CB(skb)->tun_key->ipv4_dst;
> > +     iph->saddr      = saddr;
> > +     iph->tos        = OVS_CB(skb)->tun_key->ipv4_tos;
> > +     iph->ttl        = OVS_CB(skb)->tun_key->ipv4_ttl;
> > +     iph->frag_off   = OVS_CB(skb)->tun_key->tun_flags &
> > +
> > +                             OVS_TNL_F_DONT_FRAGMENT ?  htons(IP_DF) :
> 0;
> > +     /*
> > +      * Allow our local IP stack to fragment the outer packet even
> > +      * if the DF bit is set as a last resort.  We also need to
> > +      * force selection of an IP ID here with __ip_select_ident(),
> > +      * as ip_select_ident() assumes a proper ID is not needed when
> > +      * when the DF bit is set.
> > +      */
> > +     skb->local_df = 1;
> > +     __ip_select_ident(iph, skb_dst(skb), 0);
> >
>
> This call to __ip_select_ident() is now redundant, as
> tunnel_ip_select_ident() is called below.
>
> right.


> > -next:
> > -             skb = next_skb;
> > -     }
> > +     tunnel_ip_select_ident(skb, inner_iph, &rt_dst(rt));
> > +     sent_len = tnl_send_out(vport, skb, tunnel_hlen, build_header);
> >
> >       if (unlikely(sent_len == 0))
> >               ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
> >
> >       return sent_len;
> >
> > -err_free_rt:
> > -     ip_rt_put(rt);
> > error_free:
> >       kfree_skb(skb);
> > +error:
> >       ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
> >       return sent_len;
> > }
> >
> ...
> > diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
> > index 1fff5ae..8d9e232 100644
> > --- a/datapath/vport-lisp.c
> > +++ b/datapath/vport-lisp.c
> > @@ -30,6 +30,7 @@
> >
> ...
> >
> > -static void lisp_build_header(const struct vport *vport,
> > -                           struct sk_buff *skb,
> > -                           int tunnel_hlen)
> > +static struct sk_buff *lisp_build_header(const struct vport *vport,
> > +                                      struct sk_buff *skb,
> > +                                      int tunnel_hlen)
> > {
> > -     struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
> > -     struct udphdr *udph = udp_hdr(skb);
> > -     struct lisphdr *lisph = (struct lisphdr *)(udph + 1);
> > +     struct lisp_port *lisp_port = lisp_vport_priv(vport);
> > +     struct udphdr *udph;
> > +     struct lisphdr *lisph;
> >       const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
> >
> > -     udph->dest = tnl_vport->dst_port;
> > -     udph->source = htons(ovs_tnl_get_src_port(skb));
> > +     if (skb_is_gso(skb)) {
> > +             struct sk_buff *nskb;
> > +
> > +             nskb = __skb_gso_segment(skb, 0, false);
> > +             if (IS_ERR(nskb)) {
> > +                     kfree_skb(skb);
> > +                     return nskb;
> > +             }
> > +
>
> Could GSO UDP TUNNEL support be used here as well?
>
> good idea. I will fix it.


> > +             consume_skb(skb);
> > +             skb = nskb;
> > +     }
> > +
> > +     skb_push(skb, LISP_HLEN);
> > +     udph = (struct udphdr *) skb->data;
> > +     lisph = (struct lisphdr *)(udph + 1);
> > +
> > +     udph->dest = lisp_port->dst_port;
> > +     udph->source = htons(ovs_tunnel_source_port(skb));
> >       udph->check = 0;
> > -     udph->len = htons(skb->len - skb_transport_offset(skb));
> > +     udph->len = htons(skb->len);
> >
> >       lisph->nonce_present = 0;       /* We don't support echo nonce
> algorithm */
> >       lisph->locator_status_bits_present = 1; /* Set LSB */
> > @@ -207,6 +206,28 @@ static void lisp_build_header(const struct vport
> *vport,
> >
> >       tunnel_id_to_instance_id(tun_key->tun_id,
> &lisph->u2.word2.instance_id[0]);
> >       lisph->u2.word2.locator_status_bits = 1;
> > +
> > +     return skb;
> > +}
> > +
> ...
> > --
> > 1.7.1
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > http://openvswitch.org/mailman/listinfo/dev
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20130404/8dfef3ff/attachment-0003.html>


More information about the dev mailing list