[ovs-dev] [PATCH] datapath: Add basic MPLS support to kernel

ravi kerur rkerur at gmail.com
Wed Oct 3 21:31:36 UTC 2012


This patch looks very similar to the ones I had posted earlier albeit one
key difference being a variable added in ovs_skb_cb and obvious difference
in offset calculation code which is done based on this field. Some questions

1. Functions in this patch have similar issues which my initial patch had
for e.g. not updating checksum after push/pop.

2. In the long list of email conversation we had in July at least my
understanding was you did not want additional variables to be added to
ovs_skb_cb, is this still the case? if not, what's the advantage of this
approach v/s the approach taken in my patch?

3. Do you plan to use this as a base for packet recirculation logic?
because I do not see anything that relates to it. Does recirculation for
qinq, multiple mpls tags, mpls/gre and other multiple encaps be supported
by this logic?

If answer to Q2 is yes, then I feel this would be a bit regress since the
kernel patch I had posted earlier was reviewed at the very early stage by
Ben and later by Pravin. If answer to Q2 is "more fields can be added"
 then the relevant changes in this patch can be easily integrated.  just my
2 cents.

On Wed, Oct 3, 2012 at 12:17 PM, Jesse Gross <jesse at nicira.com> wrote:

> From: Leo Alterman <lalterman at nicira.com>
>
> Allow datapath to recognize and extract MPLS labels into flow keys
> and execute actions which push, pop, and set labels on packets.
>
> Signed-off-by: Leo Alterman <lalterman at nicira.com>
> --
> I haven't reviewed this but hopefully it can be used a good starting point.
> ---
>  datapath/actions.c          |   81
> +++++++++++++++++++++++++++++++++++++++++++
>  datapath/datapath.c         |   56 ++++++++++++++++++++++++++++++
>  datapath/datapath.h         |    8 +++++
>  datapath/flow.c             |   30 ++++++++++++++++
>  datapath/flow.h             |    7 ++++
>  datapath/vport.c            |    2 ++
>  include/linux/openvswitch.h |   32 +++++++++++++++++
>  7 files changed, 216 insertions(+)
>
> diff --git a/datapath/actions.c b/datapath/actions.c
> index 208f260..a199125 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -47,6 +47,67 @@ static int make_writable(struct sk_buff *skb, int
> write_len)
>         return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
>  }
>
> +static __be16 get_ethertype(const struct sk_buff *skb)
> +{
> +       struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) -
> ETH_HLEN);
> +       return hdr->h_proto;
> +}
> +
> +static void set_ethertype(struct sk_buff *skb, const __be16 ethertype)
> +{
> +       struct ethhdr *hdr = (struct ethhdr *)(skb_cb_mpls_stack(skb) -
> ETH_HLEN);
> +       hdr->h_proto = ethertype;
> +}
> +
> +static int push_mpls(struct sk_buff *skb, const struct
> ovs_action_push_mpls *mpls)
> +{
> +       u32 l2_size;
> +       __be32 *new_mpls_label;
> +
> +       if (skb_cow_head(skb, MPLS_HLEN) < 0) {
> +               kfree_skb(skb);
> +               return -ENOMEM;
> +       }
> +
> +       l2_size = skb_cb_mpls_stack_offset(skb);
> +       skb_push(skb, MPLS_HLEN);
> +       memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
> l2_size);
> +       skb_reset_mac_header(skb);
> +
> +       new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
> +       *new_mpls_label = mpls->mpls_label;
> +
> +       set_ethertype(skb, mpls->mpls_ethertype);
> +       return 0;
> +}
> +
> +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> +{
> +       __be16 current_ethertype = get_ethertype(skb);
> +       if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> +               current_ethertype == htons(ETH_P_MPLS_MC)) {
> +               u32 l2_size = skb_cb_mpls_stack_offset(skb);
> +
> +               memmove(skb_mac_header(skb) + MPLS_HLEN,
> skb_mac_header(skb), l2_size);
> +
> +               skb_pull(skb, MPLS_HLEN);
> +               skb_reset_mac_header(skb);
> +
> +               set_ethertype(skb, *ethertype);
> +       }
> +       return 0;
> +}
> +
> +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
> +{
> +       __be16 current_ethertype = get_ethertype(skb);
> +       if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> +               current_ethertype == htons(ETH_P_MPLS_MC)) {
> +               memcpy(skb_cb_mpls_stack(skb), mpls_label, sizeof(__be32));
> +       }
> +       return 0;
> +}
> +
>  /* remove VLAN header from packet and update csum accrodingly. */
>  static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
>  {
> @@ -100,6 +161,9 @@ static int pop_vlan(struct sk_buff *skb)
>                 return err;
>
>         __vlan_hwaccel_put_tag(skb, ntohs(tci));
> +
> +       /* update pointer to MPLS label stack */
> +       skb_cb_set_mpls_stack(skb);
>         return 0;
>  }
>
> @@ -120,6 +184,9 @@ static int push_vlan(struct sk_buff *skb, const struct
> ovs_action_push_vlan *vla
>
>         }
>         __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) &
> ~VLAN_TAG_PRESENT);
> +
> +       /* update pointer to MPLS label stack */
> +       skb_cb_set_mpls_stack(skb);
>         return 0;
>  }
>
> @@ -396,6 +463,20 @@ static int do_execute_actions(struct datapath *dp,
> struct sk_buff *skb,
>                         output_userspace(dp, skb, a);
>                         break;
>
> +               case OVS_ACTION_ATTR_PUSH_MPLS:
> +                       err = push_mpls(skb, nla_data(a));
> +                       if (unlikely(err)) /* skb already freed. */
> +                               return err;
> +                       break;
> +
> +               case OVS_ACTION_ATTR_POP_MPLS:
> +                       err = pop_mpls(skb, nla_data(a));
> +                       break;
> +
> +               case OVS_ACTION_ATTR_SET_MPLS:
> +                       err = set_mpls(skb, nla_data(a));
> +                       break;
> +
>                 case OVS_ACTION_ATTR_PUSH_VLAN:
>                         err = push_vlan(skb, nla_data(a));
>                         if (unlikely(err)) /* skb already freed. */
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index c83ce16..91b70bb 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -74,6 +74,41 @@ int ovs_net_id __read_mostly;
>  int (*ovs_dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int
> cmd);
>  EXPORT_SYMBOL(ovs_dp_ioctl_hook);
>
> +void skb_cb_set_mpls_stack(struct sk_buff *skb)
> +{
> +       struct ethhdr *eth;
> +       int nh_ofs;
> +       __be16 dl_type = 0;
> +
> +       eth = eth_hdr(skb);
> +       nh_ofs = sizeof(struct ethhdr);
> +       if (likely(eth->h_proto >= htons(ETH_TYPE_MIN))) {
> +               dl_type = eth->h_proto;
> +
> +               while (dl_type == htons(ETH_P_8021Q) &&
> +                               skb->len >= nh_ofs + sizeof(struct
> vlan_hdr)) {
> +                       struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data
> + nh_ofs);
> +                       dl_type = vh->h_vlan_encapsulated_proto;
> +                       nh_ofs += sizeof(struct vlan_hdr);
> +               }
> +
> +               OVS_CB(skb)->mpls_stack = nh_ofs;
> +       } else {
> +               OVS_CB(skb)->mpls_stack = 0;
> +       }
> +}
> +
> +unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb)
> +{
> +       return OVS_CB(skb)->mpls_stack ?
> +                                       skb_mac_header(skb) +
> OVS_CB(skb)->mpls_stack : 0;
> +}
> +
> +ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb)
> +{
> +       return OVS_CB(skb)->mpls_stack;
> +}
> +
>  /**
>   * DOC: Locking:
>   *
> @@ -663,12 +698,17 @@ static int validate_actions(const struct nlattr
> *attr,
>                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
>                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
>                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
> +                       [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct
> ovs_action_push_mpls),
> +                       [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
> +                       [OVS_ACTION_ATTR_SET_MPLS] = sizeof(__be32),
>                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct
> ovs_action_push_vlan),
>                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
>                         [OVS_ACTION_ATTR_SET] = (u32)-1,
>                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
>                 };
>                 const struct ovs_action_push_vlan *vlan;
> +               __be16 mpls_ethertype;
> +               __be32 mpls_label;
>                 int type = nla_type(a);
>
>                 if (type > OVS_ACTION_ATTR_MAX ||
> @@ -691,6 +731,22 @@ static int validate_actions(const struct nlattr *attr,
>                                 return -EINVAL;
>                         break;
>
> +               case OVS_ACTION_ATTR_PUSH_MPLS:
> +                       mpls_ethertype = nla_get_be16(a);
> +                       if (mpls_ethertype != htons(ETH_P_MPLS_UC) &&
> +                               mpls_ethertype != htons(ETH_P_MPLS_MC))
> +                               return -EINVAL;
> +                       break;
> +
> +               case OVS_ACTION_ATTR_POP_MPLS:
> +                       break;
> +
> +               case OVS_ACTION_ATTR_SET_MPLS:
> +                       mpls_label = nla_get_be32(a);
> +                       if (mpls_label == htonl(0)) {
> +                               return -EINVAL;
> +                       }
> +                       break;
>
>                 case OVS_ACTION_ATTR_POP_VLAN:
>                         break;
> diff --git a/datapath/datapath.h b/datapath/datapath.h
> index affbf0e..1801ccd 100644
> --- a/datapath/datapath.h
> +++ b/datapath/datapath.h
> @@ -97,6 +97,9 @@ struct datapath {
>   * struct ovs_skb_cb - OVS data in skb CB
>   * @flow: The flow associated with this packet.  May be %NULL if no flow.
>   * @tun_id: ID of the tunnel that encapsulated this packet.  It is 0 if
> the
> + * packet is not being tunneled.
> + * @mpls_stack: Offset of the packet's MPLS stack from the beginning of
> the
> + * ethernet frame.  It is 0 if no MPLS stack is present.
>   * @ip_summed: Consistently stores L4 checksumming status across different
>   * kernel versions.
>   * @csum_start: Stores the offset from which to start checksumming
> independent
> @@ -108,6 +111,7 @@ struct datapath {
>  struct ovs_skb_cb {
>         struct sw_flow          *flow;
>         __be64                  tun_id;
> +       ptrdiff_t       mpls_stack;
>  #ifdef NEED_CSUM_NORMALIZE
>         enum csum_type          ip_summed;
>         u16                     csum_start;
> @@ -192,4 +196,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport
> *, u32 pid, u32 seq,
>                                          u8 cmd);
>
>  int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
> +
> +void skb_cb_set_mpls_stack(struct sk_buff *skb);
> +unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb);
> +ptrdiff_t skb_cb_mpls_stack_offset(const struct sk_buff *skb);
>  #endif /* datapath.h */
> diff --git a/datapath/flow.c b/datapath/flow.c
> index d07337c..40df8ff 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -739,6 +739,14 @@ int ovs_flow_extract(struct sk_buff *skb, u16
> in_port, struct sw_flow_key *key,
>                                 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
>                         }
>                 }
> +       } else if (key->eth.type == htons(ETH_P_MPLS_UC) ||
> +                               key->eth.type == htons(ETH_P_MPLS_MC)) {
> +               error = check_header(skb, MPLS_HLEN);
> +               if (unlikely(error))
> +                       goto out;
> +
> +               key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
> +               memcpy(&key->mpls.top_label, skb_network_header(skb),
> MPLS_HLEN);
>         } else if (key->eth.type == htons(ETH_P_IPV6)) {
>                 int nh_len;             /* IPv6 Header + Extensions */
>
> @@ -836,6 +844,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
>         [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
>         [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
>         [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
> +       [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
>         [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
>         [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
>         [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
> @@ -1141,6 +1150,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key
> *swkey, int *key_lenp,
>                 swkey->ip.proto = ntohs(arp_key->arp_op);
>                 memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
>                 memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
> +       } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) ||
> +                               swkey->eth.type == htons(ETH_P_MPLS_MC)) {
> +               const struct ovs_key_mpls *mpls_key;
> +
> +               if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
> +                       return -EINVAL;
> +               attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> +
> +               key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
> +               mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> +               swkey->mpls.top_label = mpls_key->mpls_top_label;
>         }
>
>         if (attrs)
> @@ -1284,6 +1304,16 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key
> *swkey, struct sk_buff *skb)
>                 arp_key->arp_op = htons(swkey->ip.proto);
>                 memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
>                 memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
> +       } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) ||
> +                               swkey->eth.type == htons(ETH_P_MPLS_MC)) {
> +               struct ovs_key_mpls *mpls_key;
> +
> +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> sizeof(*mpls_key));
> +               if (!nla)
> +                       goto nla_put_failure;
> +               mpls_key = nla_data(nla);
> +               memset(mpls_key, 0, sizeof(struct ovs_key_mpls));
> +               mpls_key->mpls_top_label = swkey->mpls.top_label;
>         }
>
>         if ((swkey->eth.type == htons(ETH_P_IP) ||
> diff --git a/datapath/flow.h b/datapath/flow.h
> index 5be481e..c18183b 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -53,6 +53,9 @@ struct sw_flow_key {
>                 __be16 type;            /* Ethernet frame type. */
>         } eth;
>         struct {
> +               __be32 top_label;       /* 0 if no MPLS, top label from
> stack otherwise */
> +       } mpls;
> +       struct {
>                 u8     proto;           /* IP protocol or lower 8 bits of
> ARP opcode. */
>                 u8     tos;             /* IP ToS. */
>                 u8     ttl;             /* IP TTL/hop limit. */
> @@ -126,6 +129,10 @@ struct arp_eth_header {
>         unsigned char       ar_tip[4];          /* target IP address
>  */
>  } __packed;
>
> +#define ETH_TYPE_MIN 0x600
> +
> +#define MPLS_HLEN 4
> +
>  int ovs_flow_init(void);
>  void ovs_flow_exit(void);
>
> diff --git a/datapath/vport.c b/datapath/vport.c
> index 172261a..0664e4c 100644
> --- a/datapath/vport.c
> +++ b/datapath/vport.c
> @@ -464,6 +464,8 @@ void ovs_vport_receive(struct vport *vport, struct
> sk_buff *skb)
>         if (!(vport->ops->flags & VPORT_F_TUN_ID))
>                 OVS_CB(skb)->tun_id = 0;
>
> +       skb_cb_set_mpls_stack(skb);
> +
>         ovs_dp_process_received_packet(vport, skb);
>  }
>
> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index f5c9cca..78960d1 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> @@ -278,6 +278,7 @@ enum ovs_key_attr {
>         OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
>         OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
>         OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
> +       OVS_KEY_ATTR_MPLS,      /* struct ovs_key_mpls */
>         OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */
>         __OVS_KEY_ATTR_MAX
>  };
> @@ -307,6 +308,10 @@ struct ovs_key_ethernet {
>         __u8     eth_dst[6];
>  };
>
> +struct ovs_key_mpls {
> +       __be32 mpls_top_label;
> +};
> +
>  struct ovs_key_ipv4 {
>         __be32 ipv4_src;
>         __be32 ipv4_dst;
> @@ -437,6 +442,20 @@ enum ovs_userspace_attr {
>  #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
>
>  /**
> + * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action
> argument.
> + * @mpls_label: MPLS label to push.
> + * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
> + *
> + * The only values @mpls_ethertype should ever be given are
> %ETH_P_MPLS_UC and
> + * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Any other values
> would
> + * produce a corrupt packet.
> + */
> +struct ovs_action_push_mpls {
> +       __be32 mpls_label;
> +       __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC
> */
> +};
> +
> +/**
>   * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action
> argument.
>   * @vlan_tpid: Tag protocol identifier (TPID) to push.
>   * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be
> set
> @@ -461,6 +480,16 @@ struct ovs_action_push_vlan {
>   * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
>   * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify
> and its
>   * value.
> + * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label onto the top of the
> packet
> + * MPLS stack. Set the ethertype of the encapsulating frame to either
> + * %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to indicate the new packet contents.
> + * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label off of the packet's MPLS
> stack.
> + * Set the encapsulating frame's ethertype to indicate the new packet
> contents
> + * (this could potentially still be %ETH_P_MPLS_* if there are remaining
> MPLS
> + * labels).  If there are no MPLS labels as determined by ethertype, no
> action
> + * is taken.
> + * @OVS_ACTION_ATTR_SET_MPLS: Set the value of the top-most label in the
> MPLS
> + * label stack.  If there are no MPLS labels in the packet, no action is
> taken.
>   * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
>   * packet.
>   * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the
> packet.
> @@ -477,6 +506,9 @@ enum ovs_action_attr {
>         OVS_ACTION_ATTR_OUTPUT,       /* u32 port number. */
>         OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
>         OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
> +       OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
> +       OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
> +       OVS_ACTION_ATTR_SET_MPLS,     /* __be32 MPLS label */
>         OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
>         OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
>         OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
> --
> 1.7.9.5
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20121003/1fe4457b/attachment-0003.html>


More information about the dev mailing list