[ovs-dev] [PATCH v6 3/3] datapath: add layer 3 flow/port support

Pravin Shelar pshelar at nicira.com
Fri Nov 7 06:50:33 UTC 2014


On Thu, Nov 6, 2014 at 12:21 PM, Lori Jakab <lojakab at cisco.com> wrote:
> On 11/6/14 4:06 AM, Pravin Shelar wrote:
>>>>>
>>>>> +static int push_eth(struct sk_buff *skb, const struct
>>>>> ovs_action_push_eth *ethh)
>>>>> +{
>>>>> +       /* De-accelerate any hardware accelerated VLAN tag added to a
>>>>> previous
>>>>> +        * Ethernet header */
>>>>> +       if (unlikely(vlan_tx_tag_present(skb))) {
>>>>> +               int err;
>>>>> +               err = deaccel_vlan_tx_tag(skb);
>>>>> +               if (unlikely(err))
>>>>> +                       return err;
>>>>> +       }
>>>>> +
>>>>> +       /* Add the new Ethernet header */
>>>>> +       if (skb_cow_head(skb, ETH_HLEN) < 0)
>>>>> +               return -ENOMEM;
>>>>> +
>>>>> +       skb_push(skb, ETH_HLEN);
>>>>> +       skb_reset_mac_header(skb);
>>>>> +       skb_reset_mac_len(skb);
>>>>> +
>>>>> +       ether_addr_copy(eth_hdr(skb)->h_source,
>>>>> ethh->addresses.eth_src);
>>>>> +       ether_addr_copy(eth_hdr(skb)->h_dest, ethh->addresses.eth_dst);
>>>>> +       eth_hdr(skb)->h_proto = ethh->eth_type;
>>>>> +
>>>>> +       ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
>>>>> +
>>>>> +       skb->protocol = ethh->eth_type;
>>>>> +       return 0;
>>>>> +}
>>>>> +
>>
>> eth push and pop both are changing packet, therefore flow key is no
>> longer valid, so we need to invalidate flow key.
>
>
> Will call invalidate_flow_key() before return in each one.
Yes, or you can update flow key.

>
>
>>
>>>>>    static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
>>>>>                           __be32 *addr, __be32 new_addr)
>>>>>    {
>>>>> @@ -896,6 +945,14 @@ static int do_execute_actions(struct datapath *dp,
>>>>> struct sk_buff *skb,
>>>>>                           err = pop_vlan(skb, key);
>>>>>                           break;
>>>>>
>>>>> +               case OVS_ACTION_ATTR_PUSH_ETH:
>>>>> +                       err = push_eth(skb, nla_data(a));
>>>>> +                       break;
>>>>> +
>>>>> +               case OVS_ACTION_ATTR_POP_ETH:
>>>>> +                       err = pop_eth(skb);
>>>>> +                       break;
>>>>> +
>>>>>                   case OVS_ACTION_ATTR_RECIRC:
>>>>>                           err = execute_recirc(dp, skb, key, a, rem);
>>>>>                           if (last_action(a, rem)) {
>>>>> diff --git a/datapath/flow.c b/datapath/flow.c
>>>>> index a3c5d2f..f54e361 100644
>>>>> --- a/datapath/flow.c
>>>>> +++ b/datapath/flow.c
>>>>> @@ -458,28 +458,30 @@ static int key_extract(struct sk_buff *skb,
>>>>> struct
>>>>> sw_flow_key *key)
>>>>>
>>>>>           skb_reset_mac_header(skb);
>>>>>
>>>>> -       /* Link layer.  We are guaranteed to have at least the 14 byte
>>>>> Ethernet
>>>>> -        * header in the linear data area.
>>>>> -        */
>>>>> -       eth = eth_hdr(skb);
>>>>> -       ether_addr_copy(key->eth.src, eth->h_source);
>>>>> -       ether_addr_copy(key->eth.dst, eth->h_dest);
>>>>> +       /* Link layer. */
>>>>> +       if (key->phy.is_layer3) {
>>>>> +               key->eth.type = skb->protocol;
>>>>> +       } else {
>>>>> +               eth = eth_hdr(skb);
>>>>> +               ether_addr_copy(key->eth.src, eth->h_source);
>>>>> +               ether_addr_copy(key->eth.dst, eth->h_dest);
>>>>>
>>>>> -       __skb_pull(skb, 2 * ETH_ALEN);
>>>>> -       /* We are going to push all headers that we pull, so no need to
>>>>> -        * update skb->csum here.
>>>>> -        */
>>>>> +               __skb_pull(skb, 2 * ETH_ALEN);
>>>>> +               /* We are going to push all headers that we pull, so no
>>>>> need to
>>>>> +                * update skb->csum here.
>>>>> +                */
>>>>>
>>>>> -       key->eth.tci = 0;
>>>>> -       if (vlan_tx_tag_present(skb))
>>>>> -               key->eth.tci = htons(vlan_get_tci(skb));
>>>>> -       else if (eth->h_proto == htons(ETH_P_8021Q))
>>>>> -               if (unlikely(parse_vlan(skb, key)))
>>>>> -                       return -ENOMEM;
>>>>> +               key->eth.tci = 0;
>>>>> +               if (vlan_tx_tag_present(skb))
>>>>> +                       key->eth.tci = htons(vlan_get_tci(skb));
>>>>> +               else if (eth->h_proto == htons(ETH_P_8021Q))
>>>>> +                       if (unlikely(parse_vlan(skb, key)))
>>>>> +                               return -ENOMEM;
>>>>>
>>>>> -       key->eth.type = parse_ethertype(skb);
>>>>> -       if (unlikely(key->eth.type == htons(0)))
>>>>> -               return -ENOMEM;
>>>>> +               key->eth.type = parse_ethertype(skb);
>>>>> +               if (unlikely(key->eth.type == htons(0)))
>>>>> +                       return -ENOMEM;
>>>>> +       }
>>>>>
>>>>>           skb_reset_network_header(skb);
>>>>>           skb_reset_mac_len(skb);
>>>>> @@ -682,7 +684,8 @@ int ovs_flow_key_update(struct sk_buff *skb, struct
>>>>> sw_flow_key *key)
>>>>>
>>>>>    int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
>>>>>                            struct sk_buff *skb,
>>>>> -                        struct sw_flow_key *key)
>>>>> +                        struct sw_flow_key *key,
>>>>> +                        bool is_layer3)
>>>>>    {
>>>>>           /* Extract metadata from packet. */
>>>>>           if (tun_info) {
>>>>> @@ -706,6 +709,7 @@ int ovs_flow_key_extract(const struct
>>>>> ovs_tunnel_info
>>>>> *tun_info,
>>>>>           key->phy.priority = skb->priority;
>>>>>           key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
>>>>>           key->phy.skb_mark = skb->mark;
>>>>> +       key->phy.is_layer3 = is_layer3;
>>>>>           key->ovs_flow_hash = 0;
>>>>>           key->recirc_id = 0;
>>>>>
>>>>> diff --git a/datapath/flow.h b/datapath/flow.h
>>>>> index c78b864..fbbc6f1 100644
>>>>> --- a/datapath/flow.h
>>>>> +++ b/datapath/flow.h
>>>>> @@ -130,6 +130,7 @@ struct sw_flow_key {
>>>>>                   u32     priority;       /* Packet QoS priority. */
>>>>>                   u32     skb_mark;       /* SKB mark. */
>>>>>                   u16     in_port;        /* Input switch port (or
>>>>> DP_MAX_PORTS). */
>>>>> +               bool    is_layer3;      /* Packet has no Ethernet
>>>>> header
>>>>> */
>>>>>           } __packed phy; /* Safe when right after 'tun_key'. */
>>>>>           u32 ovs_flow_hash;              /* Datapath computed hash
>>>>> value.
>>>>> */
>>>>>           u32 recirc_id;                  /* Recirculation ID.  */
>>>>> @@ -253,7 +254,8 @@ void ovs_flow_stats_clear(struct sw_flow *);
>>>>>    u64 ovs_flow_used_time(unsigned long flow_jiffies);
>>>>>
>>>>>    int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
>>>>> -                        struct sk_buff *skb, struct sw_flow_key *key);
>>>>> +                        struct sk_buff *skb, struct sw_flow_key *key,
>>>>> +                        bool is_layer3);
>>>>>    /* Extract key from packet coming from userspace. */
>>>>>    int ovs_flow_key_extract_userspace(const struct nlattr *attr,
>>>>>                                      struct sk_buff *skb,
>>>>> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
>>>>> index 37b0bdd..17e239b 100644
>>>>> --- a/datapath/flow_netlink.c
>>>>> +++ b/datapath/flow_netlink.c
>>>>> @@ -114,7 +114,7 @@ static void update_range(struct sw_flow_match
>>>>> *match,
>>>>>    static bool match_validate(const struct sw_flow_match *match,
>>>>>                              u64 key_attrs, u64 mask_attrs, bool log)
>>>>>    {
>>>>> -       u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
>>>>> +       u64 key_expected = 0;
>>>>>           u64 mask_allowed = key_attrs;  /* At most allow all key
>>>>> attributes */
>>>>>
>>>>>           /* The following mask attributes allowed only if they
>>>>> @@ -137,6 +137,10 @@ static bool match_validate(const struct
>>>>> sw_flow_match *match,
>>>>>                          | (1ULL << OVS_KEY_ATTR_IN_PORT)
>>>>>                          | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
>>>>>
>>>>> +       /* If Ethertype is present, expect MAC addresses */
>>>>> +       if (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))
>>>>> +               key_expected |= 1ULL << OVS_KEY_ATTR_ETHERNET;
>>>>> +
>>>>>           /* Check key attributes. */
>>>>>           if (match->key->eth.type == htons(ETH_P_ARP)
>>>>>                           || match->key->eth.type == htons(ETH_P_RARP))
>>>>> {
>>>>> @@ -685,6 +689,15 @@ static int metadata_from_nlattrs(struct
>>>>> sw_flow_match *match,  u64 *attrs,
>>>>>                           return -EINVAL;
>>>>>                   *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
>>>>>           }
>>>>> +       if (is_mask)
>>>>> +               /* Always exact match is_layer3 */
>>>>> +               SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask);
>>>>> +       else {
>>>>> +               if (*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET))
>>>>> +                       SW_FLOW_KEY_PUT(match, phy.is_layer3, false,
>>>>> is_mask);
>>>>> +               else
>>>>> +                       SW_FLOW_KEY_PUT(match, phy.is_layer3, true,
>>>>> is_mask);
>>>>> +       }
>>>>>           return 0;
>>>>>    }
>>>>>
>>>>> @@ -751,6 +764,17 @@ static int ovs_key_from_nlattrs(struct
>>>>> sw_flow_match
>>>>> *match, u64 attrs,
>>>>>           if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
>>>>>                   const struct ovs_key_ipv4 *ipv4_key;
>>>>>
>>>>> +               /* Add eth.type value for layer 3 flows */
>>>>> +               if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
>>>>> +                       __be16 eth_type;
>>>>> +
>>>>> +                       if (is_mask)
>>>>> +                               eth_type = htons(0xffff);
>>>>> +                       else
>>>>> +                               eth_type = htons(ETH_P_IP);
>>>>> +                       SW_FLOW_KEY_PUT(match, eth.type, eth_type,
>>>>> is_mask);
>>>>> +               }
>>>>> +
>>>>>                   ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
>>>>>                   if (!is_mask && ipv4_key->ipv4_frag >
>>>>> OVS_FRAG_TYPE_MAX)
>>>>> {
>>>>>                           OVS_NLERR(log,
>>>>> @@ -776,6 +800,18 @@ static int ovs_key_from_nlattrs(struct
>>>>> sw_flow_match
>>>>> *match, u64 attrs,
>>>>>           if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
>>>>>                   const struct ovs_key_ipv6 *ipv6_key;
>>>>>
>>>>> +               /* Add eth.type value for layer 3 flows */
>>>>> +               if (!(attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) {
>>>>> +                       __be16 eth_type;
>>>>> +
>>>>> +                       if (is_mask) {
>>>>> +                               eth_type = htons(0xffff);
>>>>> +                       } else {
>>>>> +                               eth_type = htons(ETH_P_IPV6);
>>>>> +                       }
>>>>> +                       SW_FLOW_KEY_PUT(match, eth.type, eth_type,
>>>>> is_mask);
>>>>> +               }
>>>>> +
>>>>>                   ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
>>>>>                   if (!is_mask && ipv6_key->ipv6_frag >
>>>>> OVS_FRAG_TYPE_MAX)
>>>>> {
>>>>>                           OVS_NLERR(log,
>>>>> @@ -1151,7 +1187,7 @@ int ovs_nla_put_flow(const struct sw_flow_key
>>>>> *swkey,
>>>>>                        const struct sw_flow_key *output, struct sk_buff
>>>>> *skb)
>>>>>    {
>>>>>           struct ovs_key_ethernet *eth_key;
>>>>> -       struct nlattr *nla, *encap;
>>>>> +       struct nlattr *nla, *encap = NULL;
>>>>>           bool is_mask = (swkey != output);
>>>>>
>>>>>           if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH,
>>>>> output->ovs_flow_hash))
>>>>> @@ -1190,6 +1226,9 @@ int ovs_nla_put_flow(const struct sw_flow_key
>>>>> *swkey,
>>>>>           if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK,
>>>>> output->phy.skb_mark))
>>>>>                   goto nla_put_failure;
>>>>>
>>>>> +       if (swkey->phy.is_layer3)
>>>>> +               goto noethernet;
>>>>> +
>>>>>           nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET,
>>>>> sizeof(*eth_key));
>>>>>           if (!nla)
>>>>>                   goto nla_put_failure;
>>>>> @@ -1207,8 +1246,7 @@ int ovs_nla_put_flow(const struct sw_flow_key
>>>>> *swkey,
>>>>>                   encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
>>>>>                   if (!swkey->eth.tci)
>>>>>                           goto unencap;
>>>>> -       } else
>>>>> -               encap = NULL;
>>>>> +       }
>>>>>
>>>>>           if (swkey->eth.type == htons(ETH_P_802_2)) {
>>>>>                   /*
>>>>> @@ -1227,6 +1265,7 @@ int ovs_nla_put_flow(const struct sw_flow_key
>>>>> *swkey,
>>>>>           if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
>>>>> output->eth.type))
>>>>>                   goto nla_put_failure;
>>>>>
>>>>> +noethernet:
>>>>>           if (swkey->eth.type == htons(ETH_P_IP)) {
>>>>>                   struct ovs_key_ipv4 *ipv4_key;
>>>>>
>>>>> @@ -1640,7 +1679,8 @@ static int validate_and_copy_set_tun(const struct
>>>>> nlattr *attr,
>>>>>    static int validate_set(const struct nlattr *a,
>>>>>                           const struct sw_flow_key *flow_key,
>>>>>                           struct sw_flow_actions **sfa,
>>>>> -                       bool *set_tun, __be16 eth_type, bool log)
>>>>> +                       bool *set_tun, __be16 eth_type, bool log,
>>>>> +                       bool is_layer3)
>>>>>    {
>>>>>           const struct nlattr *ovs_key = nla_data(a);
>>>>>           int key_type = nla_type(ovs_key);
>>>>> @@ -1661,7 +1701,11 @@ static int validate_set(const struct nlattr *a,
>>>>>
>>>>>           case OVS_KEY_ATTR_PRIORITY:
>>>>>           case OVS_KEY_ATTR_SKB_MARK:
>>>>> +               break;
>>>>> +
>>>>>           case OVS_KEY_ATTR_ETHERNET:
>>>>> +               if (is_layer3)
>>>>> +                       return -EINVAL;
>>>>>                   break;
>>>>>
>>>> Why similar checks are not done for MPLS actions?
>>>
>>>
>>> Omission on my part, will fix in v7.
>>>
>>>
>>>>>           case OVS_KEY_ATTR_TUNNEL:
>>>>> @@ -1779,6 +1823,7 @@ static int __ovs_nla_copy_actions(const struct
>>>>> nlattr *attr,
>>>>>    {
>>>>>           const struct nlattr *a;
>>>>>           int rem, err;
>>>>> +       bool is_layer3 = key->phy.is_layer3;
>>>>>
>>>>>           if (depth >= SAMPLE_ACTION_DEPTH)
>>>>>                   return -EOVERFLOW;
>>>>> @@ -1789,6 +1834,8 @@ static int __ovs_nla_copy_actions(const struct
>>>>> nlattr *attr,
>>>>>                           [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
>>>>>                           [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
>>>>>                           [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
>>>>> +                       [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct
>>>>> ovs_action_push_eth),
>>>>> +                       [OVS_ACTION_ATTR_POP_ETH] = 0,
>>>>>                           [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct
>>>>> ovs_action_push_mpls),
>>>>>                           [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
>>>>>                           [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct
>>>>> ovs_action_push_vlan),
>>>>> @@ -1835,11 +1882,31 @@ static int __ovs_nla_copy_actions(const struct
>>>>> nlattr *attr,
>>>>>                           break;
>>>>>                   }
>>>>>
>>>>> +               case OVS_ACTION_ATTR_POP_ETH:
>>>>> +                       if (is_layer3)
>>>>> +                               return -EINVAL;
>>>>> +                       if (vlan_tci & htons(VLAN_TAG_PRESENT))
>>>>> +                               return -EINVAL;
>>>>> +                       is_layer3 = true;
>>>>> +                       break;
>>>>> +
>>>>> +               case OVS_ACTION_ATTR_PUSH_ETH:
>>>>> +                       /* For now disallow pushing an Ethernet header
>>>>> if
>>>>> one
>>>>> +                        * is already present */
>>>>> +                       if (!is_layer3)
>>>>> +                               return -EINVAL;
>>>>> +                       is_layer3 = false;
>>>>> +                       break;
>>>>> +
>>>>>                   case OVS_ACTION_ATTR_POP_VLAN:
>>>>> +                       if (is_layer3)
>>>>> +                               return -EINVAL;
>>>>>                           vlan_tci = htons(0);
>>>>>                           break;
>>>>>
>>>>>                   case OVS_ACTION_ATTR_PUSH_VLAN:
>>>>> +                       if (is_layer3)
>>>>> +                               return -EINVAL;
>>>>>                           vlan = nla_data(a);
>>>>>                           if (vlan->vlan_tpid != htons(ETH_P_8021Q))
>>>>>                                   return -EINVAL;
>>>>> @@ -1889,7 +1956,7 @@ static int __ovs_nla_copy_actions(const struct
>>>>> nlattr *attr,
>>>>>
>>>>>                   case OVS_ACTION_ATTR_SET:
>>>>>                           err = validate_set(a, key, sfa, &skip_copy,
>>>>> eth_type,
>>>>> -                                          log);
>>>>> +                                          log, is_layer3);
>>>>>                           if (err)
>>>>>                                   return err;
>>>>>                           break;
>>>>> diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c
>>>>> index 7c08577..75248cc 100644
>>>>> --- a/datapath/vport-geneve.c
>>>>> +++ b/datapath/vport-geneve.c
>>>>> @@ -200,7 +200,7 @@ static int geneve_rcv(struct sock *sk, struct
>>>>> sk_buff
>>>>> *skb)
>>>>>                                   key, flags,
>>>>>                                   geneveh->options, opts_len);
>>>>>
>>>>> -       ovs_vport_receive(vport_from_priv(geneve_port), skb,
>>>>> &tun_info);
>>>>> +       ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info,
>>>>> false);
>>>>>           goto out;
>>>>>
>>>>>    error:
>>>>> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
>>>>> index 41c025d..ffb69d1 100644
>>>>> --- a/datapath/vport-gre.c
>>>>> +++ b/datapath/vport-gre.c
>>>>> @@ -114,7 +114,7 @@ static int gre_rcv(struct sk_buff *skb,
>>>>>           ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
>>>>>                                  filter_tnl_flags(tpi->flags), NULL,
>>>>> 0);
>>>>>
>>>>> -       ovs_vport_receive(vport, skb, &tun_info);
>>>>> +       ovs_vport_receive(vport, skb, &tun_info, false);
>>>>>           return PACKET_RCVD;
>>>>>    }
>>>>>
>>>>> diff --git a/datapath/vport-internal_dev.c
>>>>> b/datapath/vport-internal_dev.c
>>>>> index a1c4949..faf8378 100644
>>>>> --- a/datapath/vport-internal_dev.c
>>>>> +++ b/datapath/vport-internal_dev.c
>>>>> @@ -77,7 +77,7 @@ static struct net_device_stats
>>>>> *internal_dev_sys_stats(struct net_device *netdev
>>>>>    static int internal_dev_xmit(struct sk_buff *skb, struct net_device
>>>>> *netdev)
>>>>>    {
>>>>>           rcu_read_lock();
>>>>> -       ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
>>>>> +       ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL,
>>>>> false);
>>>>>           rcu_read_unlock();
>>>>>           return 0;
>>>>>    }
>>>>> diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
>>>>> index f3d450f..4f35c33 100644
>>>>> --- a/datapath/vport-lisp.c
>>>>> +++ b/datapath/vport-lisp.c
>>>>> @@ -232,8 +232,6 @@ static int lisp_rcv(struct sock *sk, struct sk_buff
>>>>> *skb)
>>>>>           struct iphdr *iph, *inner_iph;
>>>>>           struct ovs_tunnel_info tun_info;
>>>>>           __be64 key;
>>>>> -       struct ethhdr *ethh;
>>>>> -       __be16 protocol;
>>>>>
>>>>>           lisp_port = lisp_find_port(dev_net(skb->dev),
>>>>> udp_hdr(skb)->dest);
>>>>>           if (unlikely(!lisp_port))
>>>>> @@ -259,26 +257,16 @@ static int lisp_rcv(struct sock *sk, struct
>>>>> sk_buff
>>>>> *skb)
>>>>>           inner_iph = (struct iphdr *)(lisph + 1);
>>>>>           switch (inner_iph->version) {
>>>>>           case 4:
>>>>> -               protocol = htons(ETH_P_IP);
>>>>> +               skb->protocol = htons(ETH_P_IP);
>>>>>                   break;
>>>>>           case 6:
>>>>> -               protocol = htons(ETH_P_IPV6);
>>>>> +               skb->protocol = htons(ETH_P_IPV6);
>>>>>                   break;
>>>>>           default:
>>>>>                   goto error;
>>>>>           }
>>>>> -       skb->protocol = protocol;
>>>>>
>>>>> -       /* Add Ethernet header */
>>>>> -       ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
>>>>> -       memset(ethh, 0, ETH_HLEN);
>>>>> -       ethh->h_dest[0] = 0x02;
>>>>> -       ethh->h_source[0] = 0x02;
>>>>> -       ethh->h_proto = protocol;
>>>>> -
>>>>> -       ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
>>>>> -
>>>>> -       ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_info);
>>>>> +       ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_info,
>>>>> true);
>>>>>           goto out;
>>>>>
>>>>>    error:
>>>>> @@ -452,8 +440,9 @@ static int lisp_send(struct vport *vport, struct
>>>>> sk_buff *skb)
>>>>>
>>>>>           tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
>>>>>
>>>>> -       if (skb->protocol != htons(ETH_P_IP) &&
>>>>> -           skb->protocol != htons(ETH_P_IPV6)) {
>>>>> +       if ((skb->protocol != htons(ETH_P_IP) &&
>>>>> +           skb->protocol != htons(ETH_P_IPV6)) ||
>>>>> +           vlan_tx_tag_present(skb)) {
>>>>>                   kfree_skb(skb);
>>>>>                   return 0;
>>>>>           }
>>
>> We also need to reject layer3 packet in other vport implementations
>> which do not support such packets.
>
>
> Righ, will add that in v7.
>
>
>>
>>
>>>>> @@ -483,11 +472,6 @@ static int lisp_send(struct vport *vport, struct
>>>>> sk_buff *skb)
>>>>>                           goto err_free_rt;
>>>>>           }
>>>>>
>>>>> -       /* Reset l2 headers. */
>>>>> -       skb_pull(skb, network_offset);
>>>>> -       skb_reset_mac_header(skb);
>>>>> -       vlan_set_tci(skb, 0);
>>>>> -
>>>>>           skb_reset_inner_headers(skb);
>>>>>
>>>>>           __skb_push(skb, LISP_HLEN);
>>>>> diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
>>>>> index 9c0908a..72bba4e 100644
>>>>> --- a/datapath/vport-netdev.c
>>>>> +++ b/datapath/vport-netdev.c
>>>>> @@ -211,7 +211,7 @@ static void netdev_port_receive(struct vport
>>>>> *vport,
>>>>> struct sk_buff *skb)
>>>>>           skb_push(skb, ETH_HLEN);
>>>>>           ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
>>>>>
>>>>> -       ovs_vport_receive(vport, skb, NULL);
>>>>> +       ovs_vport_receive(vport, skb, NULL, false);
>>>>>           return;
>>>>>
>>>>>    error:
>>>>> diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
>>>>> index 8689853..ad67835 100644
>>>>> --- a/datapath/vport-vxlan.c
>>>>> +++ b/datapath/vport-vxlan.c
>>>>> @@ -72,7 +72,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct
>>>>> sk_buff *skb, __be32 vx_vni)
>>>>>                                  udp_hdr(skb)->source,
>>>>> udp_hdr(skb)->dest,
>>>>>                                  key, TUNNEL_KEY, NULL, 0);
>>>>>
>>>>> -       ovs_vport_receive(vport, skb, &tun_info);
>>>>> +       ovs_vport_receive(vport, skb, &tun_info, false);
>>>>>    }
>>>>>
>>>>>    static int vxlan_get_options(const struct vport *vport, struct
>>>>> sk_buff
>>>>> *skb)
>>>>> diff --git a/datapath/vport.c b/datapath/vport.c
>>>>> index 274e47f..7b588bd 100644
>>>>> --- a/datapath/vport.c
>>>>> +++ b/datapath/vport.c
>>>>> @@ -438,13 +438,15 @@ u32 ovs_vport_find_upcall_portid(const struct
>>>>> vport
>>>>> *vport, struct sk_buff *skb)
>>>>>     * @vport: vport that received the packet
>>>>>     * @skb: skb that was received
>>>>>     * @tun_info: tunnel (if any) that carried packet
>>>>> + * @is_layer3: packet is layer 3
>>>>>     *
>>>>>     * Must be called with rcu_read_lock.  The packet cannot be shared
>>>>> and
>>>>>     * skb->data should point to the Ethernet header.  The caller must
>>>>> have
>>>>> already
>>>>>     * called compute_ip_summed() to initialize the checksumming fields.
>>>>>     */
>>>>>    void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
>>>>> -                      const struct ovs_tunnel_info *tun_info)
>>>>> +                      const struct ovs_tunnel_info *tun_info,
>>>>> +                      bool is_layer3)
>>>>>    {
>>>>>           struct pcpu_sw_netstats *stats;
>>>>>           struct sw_flow_key key;
>>>>> @@ -459,7 +461,7 @@ void ovs_vport_receive(struct vport *vport, struct
>>>>> sk_buff *skb,
>>>>>           ovs_skb_init_inner_protocol(skb);
>>>>>           OVS_CB(skb)->input_vport = vport;
>>>>>           OVS_CB(skb)->egress_tun_info = NULL;
>>>>> -       error = ovs_flow_key_extract(tun_info, skb, &key);
>>>>> +       error = ovs_flow_key_extract(tun_info, skb, &key, is_layer3);
>>>>>           if (unlikely(error)) {
>>>>>                   kfree_skb(skb);
>>>>>                   return;
>>>>> diff --git a/datapath/vport.h b/datapath/vport.h
>>>>> index c0ab88a..aeebc9a 100644
>>>>> --- a/datapath/vport.h
>>>>> +++ b/datapath/vport.h
>>>>> @@ -218,7 +218,7 @@ static inline struct vport *vport_from_priv(void
>>>>> *priv)
>>>>>    }
>>>>>
>>>>>    void ovs_vport_receive(struct vport *, struct sk_buff *,
>>>>> -                      const struct ovs_tunnel_info *);
>>>>> +                      const struct ovs_tunnel_info *, bool is_layer3);
>>>>>
>>>>>    /* List of statically compiled vport implementations.  Don't forget
>>>>> to
>>>>> also
>>>>>     * add yours to the list at the top of vport.c.
>>>>> --
>>>>
>>>> Have you tried running GSO traffic over lisp using OVS compat GSO code
>>>> and upstream GSO code?
>>>
>>>
>>> I haven't and I'm not sure what's the right way to do that.  I do my
>>> testing
>>> between to VMs.  I see with ethtool that GSO is enabled on the virtual
>>> NICs
>>> in the VMs, and on the br0 interface after the switch is created.
>>>
>> This is fine.
>>
>>> To test that I can enable/disable GSO I download a 30MB file over HTTP
>>> and
>>> look in Wireshark for packets satisfying "ip.len > 1500".  With TSO
>>> enabled,
>>> I do get such packets.  Not with GSO though.
>>>
>>> Can you point me to the right way to test this?
>>
>> netperf test will to the trick, But you need to test with different
>> kernel versions.
>> - kernel < 3.10 where ovs configure could not find symbol
>> "gre_handle_offloads"
>> - kernel 3.17 which has all the offload used by OVS.
>
>
> I have one VM with Fedora 18 and kernel 3.7.2-201.fc18.x86_64, and the other
> with Fedora 19 and kernel 3.14.19-100.fc19.x86_64.  I tried netperf in both
> directions.  In both cases the OVS + LISP performance vs. direct link
> performace was one order of magnitude worse.  Is that to be expected?  Is
> this a good enough test for GSO traffic?  If yes, I will send out v7.
>
Can you give me numbers the you are seeing? One way to test GSO is to
look for any dropped packet at source, tcpdump can help with that. You
also need to set physical MTU large enough for encapsulated packet.



More information about the dev mailing list