[ovs-dev] [PATCH] v2 datapath: More flexible kernel/userspace tunneling attribute.

Jesse Gross jesse at nicira.com
Thu Jan 17 02:41:10 UTC 2013


On Wed, Jan 16, 2013 at 1:54 PM, Pravin B Shelar <pshelar at nicira.com> wrote:
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index ed69af8..4ed40e2 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
> +{
> +
> +       struct sw_flow_actions *new;
> +       struct nlattr *a;
> +
> +       if (NLA_ALIGN(attr_len) <= (ksize(*sfa) - (*sfa)->actions_len))
> +               goto out;
> +
> +       if (ksize(*sfa) * 2 > MAX_ACTIONS_BUFSIZE)
> +               return ERR_PTR(-EMSGSIZE);

It's possible that the current size is smaller than
MAX_ACTIONS_BUFSIZE but 2 * size is larger.  This probably is not
likely because kmalloc will round up to a power of two and
MAX_ACTIONS_BUFSIZE is a power of two but I'm not sure that we want to
implicitly assume that.

> @@ -716,16 +850,15 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
>         err = PTR_ERR(acts);
>         if (IS_ERR(acts))
>                 goto err_flow_free;
> +
> +       err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
>         rcu_assign_pointer(flow->sf_acts, acts);
> +       if (err)
> +               goto err_flow_free;

I would probably put the error handler before continuing on with the
rcu_assign_pointer call.

> +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
> +{
> +       const struct nlattr *a;
> +       int rem, err;
> +
> +       nla_for_each_attr(a, attr, len, rem) {
> +               bool skip_copy;
> +               int type = nla_type(a);
> +
> +               skip_copy = false;
> +               switch (type) {
> +               case OVS_ACTION_ATTR_SET:
> +                       err = set_tun_action_to_attr(a, skb, &skip_copy);

The name is a little strange given that we call it unconditionally for
all set actions.

> @@ -951,28 +1179,32 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
>
>         /* Validate actions. */
>         if (a[OVS_FLOW_ATTR_ACTIONS]) {
> -               error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
> -               if (error)
> +               acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
> +               error = PTR_ERR(acts);
> +               if (IS_ERR(acts))
>                         goto error;
> +
> +               error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0, &acts);
> +               if (error) {
> +                       goto err_kfree;
> +               }

We don't need the braces around this error condition.

>         } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
>                 error = -EINVAL;
> -               goto error;
> +               goto err_kfree;

I don't we need to call err_kfree in this case because we didn't
actually allocate anything.

> diff --git a/datapath/flow.c b/datapath/flow.c
> index 63eef77..49982f0 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> +int ipv4_tun_from_nlattr(const struct nlattr *attr,
> +                        struct ovs_key_ipv4_tunnel *tun_key)
> +{
> +       struct nlattr *a;
> +       int rem;
> +
> +       memset(tun_key, 0, sizeof(*tun_key));
> +
> +       nla_for_each_nested(a, attr, rem) {
> +               int type = nla_type(a);
> +               static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_MAX + 1] = {
> +                       [OVS_TUNNEL_ID] = sizeof(u64),
> +                       [OVS_TUNNEL_IPV4_SRC] = sizeof(u32),
> +                       [OVS_TUNNEL_IPV4_DST] = sizeof(u32),
> +                       [OVS_TUNNEL_TOS] = 1,
> +                       [OVS_TUNNEL_TTL] = 1,
> +                       [OVS_TUNNEL_FLAGS_DONT_FRAGMENT] = 0,
> +                       [OVS_TUNNEL_FLAGS_CSUM] = 0,
> +               };
> +
> +               if (type > OVS_TUNNEL_MAX ||
> +                       ovs_tunnel_key_lens[type] != nla_len(a))
> +                       return -EINVAL;
> +
> +               switch (type) {
> +               case OVS_TUNNEL_ID:
> +                       memcpy(&tun_key->tun_id, nla_data(a), sizeof(__be64));
> +                       tun_key->tun_flags |= OVS_TNL_F_KEY;
> +               break;
> +               case OVS_TUNNEL_IPV4_SRC:
> +                       memcpy(&tun_key->ipv4_src, nla_data(a), sizeof(__be32));
> +               break;
> +               case OVS_TUNNEL_IPV4_DST:
> +                       memcpy(&tun_key->ipv4_dst, nla_data(a), sizeof(__be32));

Can't we use nla_get_X for these types?

> +       if (rem > 0)
> +               return -EINVAL;
> +
> +       if (!tun_key->ipv4_dst)
> +               return -EINVAL;
> +
> +       if (!tun_key->ipv4_ttl)
> +               return -EINVAL;

I would distinguish between TTL of zero and not set.  If TTL is zero
is explicitly asked for then I think it's fine to allow but we might
want to create a different default later.

> +int ipv4_tun_to_nlattr(struct sk_buff *skb,
> +                       const struct ovs_key_ipv4_tunnel *tun_key)
> +{
> +       struct nlattr *nla;
> +
> +       nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
> +       if (!nla)
> +               return -EMSGSIZE;
> +
> +       if (tun_key->tun_flags & OVS_TNL_F_KEY &&
> +           nla_put_be64(skb, OVS_TUNNEL_ID, tun_key->tun_id))
> +               return -EMSGSIZE;
> +       if (tun_key->ipv4_src &&
> +           nla_put_be32(skb, OVS_TUNNEL_IPV4_SRC, tun_key->ipv4_src))
> +               return -EMSGSIZE;
> +       if (nla_put_be32(skb, OVS_TUNNEL_IPV4_DST, tun_key->ipv4_dst))
> +               return -EMSGSIZE;
> +       if (tun_key->ipv4_tos &&
> +           nla_put_u8(skb, OVS_TUNNEL_TOS, tun_key->ipv4_tos))
> +               return -EMSGSIZE;
> +       if (tun_key->ipv4_ttl &&
> +           nla_put_u8(skb, OVS_TUNNEL_TTL, tun_key->ipv4_ttl))
> +               return -EMSGSIZE;

I think we should always include TTL in our messages since we are
saying that it is required now.

> diff --git a/datapath/flow.h b/datapath/flow.h
> index 3f3624f..4b43336 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> +struct ovs_key_ipv4_tunnel {
> +       __be64 tun_id;
> +       __u32  tun_flags;
> +       __be32 ipv4_src;
> +       __be32 ipv4_dst;
> +       __u8   ipv4_tos;
> +       __u8   ipv4_ttl;
> +       __u8   pad[2];
> +};

Is there a need to still keep the pad around?  We could also reduce
tun_flags to a u16 (or even a u8 really).  On 32-bit machines these
two things would reduce the size of the struct.

Also, you could use the non __ type definitions since this is internal
to the kernel.

> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
> index 7705475..809fefd 100644
> --- a/datapath/tunnel.h
> +++ b/datapath/tunnel.h
> @@ -59,6 +59,11 @@
>                       TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
>                       TNL_F_IPSEC)
>
> +/* Tunnel flow flags. */
> +#define OVS_TNL_F_DONT_FRAGMENT                (1 << 0)
> +#define OVS_TNL_F_CSUM                 (1 << 1)
> +#define OVS_TNL_F_KEY                  (1 << 2)

I would probably define these in flow.h with the struct
ovs_key_ipv4_tunnel definition since them seem closely related.

> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index 5e32965..9b4e257 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> +enum ovs_tunnel_attr {
> +       OVS_TUNNEL_ID,          /* be64 Tunnel ID */
> +       OVS_TUNNEL_IPV4_SRC,    /* be32 Tunnel src IP address. */
> +       OVS_TUNNEL_IPV4_DST,    /* be32 Tunnel dst IP address. */
> +       OVS_TUNNEL_TOS,         /* u8 Tunnel IP ToS. */
> +       OVS_TUNNEL_TTL,         /* u8 Tunnel IP TTL. */

I would include ATTR in these names (as in OVS_TUNNEL_ATTR_ID) to
match the other types.

> +       OVS_TUNNEL_FLAGS_DONT_FRAGMENT, /* No argument, flag to set DF. */
> +       OVS_TUNNEL_FLAGS_CSUM,  /* No argument. flag to CSUM packet. */

We probably could drop FLAGS_ from these names to make them a little shorter.

> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index e2f21da..5d7f25a 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> +/* Returns OVS_TNL_* flags. */
> +static enum odp_key_fitness
> +tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)

The comment above this function doesn't look right.

> +{
> +    unsigned int left;
> +    const struct nlattr *a;
> +
> +    NL_NESTED_FOR_EACH(a, left, attr) {
> +        uint16_t type = nl_attr_type(a);
> +        size_t len = nl_attr_get_size(a);
> +        int expected_len = tunnel_key_attr_len(type);
> +
> +        if (len != expected_len && expected_len >= 0) {
> +            return ODP_FIT_ERROR;
> +        }
> +
> +        switch (type) {
> +        case OVS_TUNNEL_ID:
> +            tun->tun_id = nl_attr_get_be64(a);
> +            tun->flags |= FLOW_TNL_F_KEY;
> +        break;
> +        case OVS_TUNNEL_IPV4_SRC:
> +            tun->ip_src = nl_attr_get_be32(a);
> +        break;
> +        case OVS_TUNNEL_IPV4_DST:
> +            tun->ip_dst = nl_attr_get_be32(a);
> +        break;
> +        case OVS_TUNNEL_TOS:
> +            tun->ip_tos = nl_attr_get_u8(a);
> +        break;
> +        case OVS_TUNNEL_TTL:
> +            tun->ip_ttl = nl_attr_get_u8(a);

Should we enforce that TTL is present?

> +        break;
> +        case OVS_TUNNEL_FLAGS_DONT_FRAGMENT:
> +            tun->flags |= FLOW_TNL_F_DONT_FRAGMENT;
> +        break;
> +        case OVS_TUNNEL_FLAGS_CSUM:
> +            tun->flags |= FLOW_TNL_F_CSUM;
> +        break;
> +        default:
> +            return ODP_FIT_TOO_MUCH;

If we get an unknown attribute we should still extract the parts that
we understand since we'll still process the flow.

> +static int
> +tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key)
> +{
> +    size_t tun_key_ofs;
> +
> +    tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL);
> +
> +    if (tun_key->flags & FLOW_TNL_F_KEY) {
> +        nl_msg_put_be64(a, OVS_TUNNEL_ID, tun_key->tun_id);
> +    }
> +    if (tun_key->ip_src) {
> +        nl_msg_put_be32(a, OVS_TUNNEL_IPV4_SRC, tun_key->ip_src);
> +    }
> +    if (tun_key->ip_dst) {
> +        nl_msg_put_be32(a, OVS_TUNNEL_IPV4_DST, tun_key->ip_dst);
> +    }
> +    if (tun_key->ip_tos) {
> +        nl_msg_put_u8(a, OVS_TUNNEL_TOS, tun_key->ip_tos);
> +    }
> +    if (tun_key->ip_ttl) {
> +        nl_msg_put_u8(a, OVS_TUNNEL_TTL, tun_key->ip_ttl);
> +    } else {
> +        return -EINVAL;

I'm not sure that we need to return an error here.  If the tunnel code
really wants to use a TTL of zero then we should let it for the time
being.  We should just always put the attribute.

> @@ -1905,23 +1962,17 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
>          expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUN_ID;
>      }
>
> -    if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL)) {
> -        const struct ovs_key_ipv4_tunnel *ipv4_tun_key;
> -
> -        ipv4_tun_key = nl_attr_get(attrs[OVS_KEY_ATTR_IPV4_TUNNEL]);
> -
> -        flow->tunnel.tun_id = ipv4_tun_key->tun_id;
> -        flow->tunnel.ip_src = ipv4_tun_key->ipv4_src;
> -        flow->tunnel.ip_dst = ipv4_tun_key->ipv4_dst;
> -        flow->tunnel.flags = odp_to_flow_flags(ipv4_tun_key->tun_flags);
> -        flow->tunnel.ip_tos = ipv4_tun_key->ipv4_tos;
> -        flow->tunnel.ip_ttl = ipv4_tun_key->ipv4_ttl;
> +    if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) {
> +        enum odp_key_fitness res;
>
> +        res = tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel);
> +        if (res == ODP_FIT_ERROR) {
> +            return ODP_FIT_ERROR;
> +        } else if (res == ODP_FIT_PERFECT) {
>          /* Allow this to show up as unexpected, if there are unknown flags,
>           * eventually resulting in ODP_FIT_TOO_MUCH.
>           * OVS_TNL_F_KNOWN_MASK defined locally above. */
> -        if (!(ipv4_tun_key->tun_flags & ~OVS_TNL_F_KNOWN_MASK)) {
> -            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL;
> +            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUNNEL;
>          }

I think comment above this if no longer applies.

Can you also make sure to test this thoroughly since it's so late in
the release cycle?



More information about the dev mailing list