[ovs-dev] [PATCH] v2 datapath: More flexible kernel/userspace tunneling attribute.

Pravin Shelar pshelar at nicira.com
Thu Jan 17 20:51:25 UTC 2013


On Wed, Jan 16, 2013 at 6:41 PM, Jesse Gross <jesse at nicira.com> wrote:
> On Wed, Jan 16, 2013 at 1:54 PM, Pravin B Shelar <pshelar at nicira.com> wrote:
>> diff --git a/datapath/datapath.c b/datapath/datapath.c
>> index ed69af8..4ed40e2 100644
>> --- a/datapath/datapath.c
>> +++ b/datapath/datapath.c
>> +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
>> +{
>> +
>> +       struct sw_flow_actions *new;
>> +       struct nlattr *a;
>> +
>> +       if (NLA_ALIGN(attr_len) <= (ksize(*sfa) - (*sfa)->actions_len))
>> +               goto out;
>> +
>> +       if (ksize(*sfa) * 2 > MAX_ACTIONS_BUFSIZE)
>> +               return ERR_PTR(-EMSGSIZE);
>
> It's possible that the current size is smaller than
> MAX_ACTIONS_BUFSIZE but 2 * size is larger.  This probably is not
> likely because kmalloc will round up to a power of two and
> MAX_ACTIONS_BUFSIZE is a power of two but I'm not sure that we want to
> implicitly assume that.
>
right , so max allocation will of size MAX_ACTIONS_BUFSIZE. should we
increment size more slowly?

>> @@ -716,16 +850,15 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
>>         err = PTR_ERR(acts);
>>         if (IS_ERR(acts))
>>                 goto err_flow_free;
>> +
>> +       err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
>>         rcu_assign_pointer(flow->sf_acts, acts);
>> +       if (err)
>> +               goto err_flow_free;
>
> I would probably put the error handler before continuing on with the
> rcu_assign_pointer call.
>
I am using ovs_flow_free() to free acts, so I need to assign it,
before error check.

>> +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
>> +{
>> +       const struct nlattr *a;
>> +       int rem, err;
>> +
>> +       nla_for_each_attr(a, attr, len, rem) {
>> +               bool skip_copy;
>> +               int type = nla_type(a);
>> +
>> +               skip_copy = false;
>> +               switch (type) {
>> +               case OVS_ACTION_ATTR_SET:
>> +                       err = set_tun_action_to_attr(a, skb, &skip_copy);
>
> The name is a little strange given that we call it unconditionally for
> all set actions.
>
ok, I rearranged code.

>> @@ -951,28 +1179,32 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
>>
>>         /* Validate actions. */
>>         if (a[OVS_FLOW_ATTR_ACTIONS]) {
>> -               error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
>> -               if (error)
>> +               acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
>> +               error = PTR_ERR(acts);
>> +               if (IS_ERR(acts))
>>                         goto error;
>> +
>> +               error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0, &acts);
>> +               if (error) {
>> +                       goto err_kfree;
>> +               }
>
> We don't need the braces around this error condition.
>
ok.

>>         } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
>>                 error = -EINVAL;
>> -               goto error;
>> +               goto err_kfree;
>
> I don't we need to call err_kfree in this case because we didn't
> actually allocate anything.
>
right.

>> diff --git a/datapath/flow.c b/datapath/flow.c
>> index 63eef77..49982f0 100644
>> --- a/datapath/flow.c
>> +++ b/datapath/flow.c
>> +int ipv4_tun_from_nlattr(const struct nlattr *attr,
>> +                        struct ovs_key_ipv4_tunnel *tun_key)
>> +{
>> +       struct nlattr *a;
>> +       int rem;
>> +
>> +       memset(tun_key, 0, sizeof(*tun_key));
>> +
>> +       nla_for_each_nested(a, attr, rem) {
>> +               int type = nla_type(a);
>> +               static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_MAX + 1] = {
>> +                       [OVS_TUNNEL_ID] = sizeof(u64),
>> +                       [OVS_TUNNEL_IPV4_SRC] = sizeof(u32),
>> +                       [OVS_TUNNEL_IPV4_DST] = sizeof(u32),
>> +                       [OVS_TUNNEL_TOS] = 1,
>> +                       [OVS_TUNNEL_TTL] = 1,
>> +                       [OVS_TUNNEL_FLAGS_DONT_FRAGMENT] = 0,
>> +                       [OVS_TUNNEL_FLAGS_CSUM] = 0,
>> +               };
>> +
>> +               if (type > OVS_TUNNEL_MAX ||
>> +                       ovs_tunnel_key_lens[type] != nla_len(a))
>> +                       return -EINVAL;
>> +
>> +               switch (type) {
>> +               case OVS_TUNNEL_ID:
>> +                       memcpy(&tun_key->tun_id, nla_data(a), sizeof(__be64));
>> +                       tun_key->tun_flags |= OVS_TNL_F_KEY;
>> +               break;
>> +               case OVS_TUNNEL_IPV4_SRC:
>> +                       memcpy(&tun_key->ipv4_src, nla_data(a), sizeof(__be32));
>> +               break;
>> +               case OVS_TUNNEL_IPV4_DST:
>> +                       memcpy(&tun_key->ipv4_dst, nla_data(a), sizeof(__be32));
>
> Can't we use nla_get_X for these types?
>
>> +       if (rem > 0)
>> +               return -EINVAL;
>> +
>> +       if (!tun_key->ipv4_dst)
>> +               return -EINVAL;
>> +
>> +       if (!tun_key->ipv4_ttl)
>> +               return -EINVAL;
>
> I would distinguish between TTL of zero and not set.  If TTL is zero
> is explicitly asked for then I think it's fine to allow but we might
> want to create a different default later.
>
ok.

>> +int ipv4_tun_to_nlattr(struct sk_buff *skb,
>> +                       const struct ovs_key_ipv4_tunnel *tun_key)
>> +{
>> +       struct nlattr *nla;
>> +
>> +       nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
>> +       if (!nla)
>> +               return -EMSGSIZE;
>> +
>> +       if (tun_key->tun_flags & OVS_TNL_F_KEY &&
>> +           nla_put_be64(skb, OVS_TUNNEL_ID, tun_key->tun_id))
>> +               return -EMSGSIZE;
>> +       if (tun_key->ipv4_src &&
>> +           nla_put_be32(skb, OVS_TUNNEL_IPV4_SRC, tun_key->ipv4_src))
>> +               return -EMSGSIZE;
>> +       if (nla_put_be32(skb, OVS_TUNNEL_IPV4_DST, tun_key->ipv4_dst))
>> +               return -EMSGSIZE;
>> +       if (tun_key->ipv4_tos &&
>> +           nla_put_u8(skb, OVS_TUNNEL_TOS, tun_key->ipv4_tos))
>> +               return -EMSGSIZE;
>> +       if (tun_key->ipv4_ttl &&
>> +           nla_put_u8(skb, OVS_TUNNEL_TTL, tun_key->ipv4_ttl))
>> +               return -EMSGSIZE;
>
> I think we should always include TTL in our messages since we are
> saying that it is required now.
>
ok.

>> diff --git a/datapath/flow.h b/datapath/flow.h
>> index 3f3624f..4b43336 100644
>> --- a/datapath/flow.h
>> +++ b/datapath/flow.h
>> +struct ovs_key_ipv4_tunnel {
>> +       __be64 tun_id;
>> +       __u32  tun_flags;
>> +       __be32 ipv4_src;
>> +       __be32 ipv4_dst;
>> +       __u8   ipv4_tos;
>> +       __u8   ipv4_ttl;
>> +       __u8   pad[2];
>> +};
>
> Is there a need to still keep the pad around?  We could also reduce
> tun_flags to a u16 (or even a u8 really).  On 32-bit machines these
> two things would reduce the size of the struct.
>
I just wanted keep padding explicit, so that we can use it latter in sw_flow
struct, I guess it not very useful I will remove it.

> Also, you could use the non __ type definitions since this is internal
> to the kernel.
>
ok.

>> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
>> index 7705475..809fefd 100644
>> --- a/datapath/tunnel.h
>> +++ b/datapath/tunnel.h
>> @@ -59,6 +59,11 @@
>>                       TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
>>                       TNL_F_IPSEC)
>>
>> +/* Tunnel flow flags. */
>> +#define OVS_TNL_F_DONT_FRAGMENT                (1 << 0)
>> +#define OVS_TNL_F_CSUM                 (1 << 1)
>> +#define OVS_TNL_F_KEY                  (1 << 2)
>
> I would probably define these in flow.h with the struct
> ovs_key_ipv4_tunnel definition since them seem closely related.
>
ok.

>> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
>> index 5e32965..9b4e257 100644
>> --- a/include/linux/openvswitch.h
>> +++ b/include/linux/openvswitch.h
>> +enum ovs_tunnel_attr {
>> +       OVS_TUNNEL_ID,          /* be64 Tunnel ID */
>> +       OVS_TUNNEL_IPV4_SRC,    /* be32 Tunnel src IP address. */
>> +       OVS_TUNNEL_IPV4_DST,    /* be32 Tunnel dst IP address. */
>> +       OVS_TUNNEL_TOS,         /* u8 Tunnel IP ToS. */
>> +       OVS_TUNNEL_TTL,         /* u8 Tunnel IP TTL. */
>
> I would include ATTR in these names (as in OVS_TUNNEL_ATTR_ID) to
> match the other types.
>
>> +       OVS_TUNNEL_FLAGS_DONT_FRAGMENT, /* No argument, flag to set DF. */
>> +       OVS_TUNNEL_FLAGS_CSUM,  /* No argument. flag to CSUM packet. */
>
> We probably could drop FLAGS_ from these names to make them a little shorter.
>
ok.

>> diff --git a/lib/odp-util.c b/lib/odp-util.c
>> index e2f21da..5d7f25a 100644
>> --- a/lib/odp-util.c
>> +++ b/lib/odp-util.c
>> +/* Returns OVS_TNL_* flags. */
>> +static enum odp_key_fitness
>> +tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
>
> The comment above this function doesn't look right.
>
>> +{
>> +    unsigned int left;
>> +    const struct nlattr *a;
>> +
>> +    NL_NESTED_FOR_EACH(a, left, attr) {
>> +        uint16_t type = nl_attr_type(a);
>> +        size_t len = nl_attr_get_size(a);
>> +        int expected_len = tunnel_key_attr_len(type);
>> +
>> +        if (len != expected_len && expected_len >= 0) {
>> +            return ODP_FIT_ERROR;
>> +        }
>> +
>> +        switch (type) {
>> +        case OVS_TUNNEL_ID:
>> +            tun->tun_id = nl_attr_get_be64(a);
>> +            tun->flags |= FLOW_TNL_F_KEY;
>> +        break;
>> +        case OVS_TUNNEL_IPV4_SRC:
>> +            tun->ip_src = nl_attr_get_be32(a);
>> +        break;
>> +        case OVS_TUNNEL_IPV4_DST:
>> +            tun->ip_dst = nl_attr_get_be32(a);
>> +        break;
>> +        case OVS_TUNNEL_TOS:
>> +            tun->ip_tos = nl_attr_get_u8(a);
>> +        break;
>> +        case OVS_TUNNEL_TTL:
>> +            tun->ip_ttl = nl_attr_get_u8(a);
>
> Should we enforce that TTL is present?
>
ok, I will add check.

>> +        break;
>> +        case OVS_TUNNEL_FLAGS_DONT_FRAGMENT:
>> +            tun->flags |= FLOW_TNL_F_DONT_FRAGMENT;
>> +        break;
>> +        case OVS_TUNNEL_FLAGS_CSUM:
>> +            tun->flags |= FLOW_TNL_F_CSUM;
>> +        break;
>> +        default:
>> +            return ODP_FIT_TOO_MUCH;
>
> If we get an unknown attribute we should still extract the parts that
> we understand since we'll still process the flow.
>
>> +static int
>> +tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key)
>> +{
>> +    size_t tun_key_ofs;
>> +
>> +    tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL);
>> +
>> +    if (tun_key->flags & FLOW_TNL_F_KEY) {
>> +        nl_msg_put_be64(a, OVS_TUNNEL_ID, tun_key->tun_id);
>> +    }
>> +    if (tun_key->ip_src) {
>> +        nl_msg_put_be32(a, OVS_TUNNEL_IPV4_SRC, tun_key->ip_src);
>> +    }
>> +    if (tun_key->ip_dst) {
>> +        nl_msg_put_be32(a, OVS_TUNNEL_IPV4_DST, tun_key->ip_dst);
>> +    }
>> +    if (tun_key->ip_tos) {
>> +        nl_msg_put_u8(a, OVS_TUNNEL_TOS, tun_key->ip_tos);
>> +    }
>> +    if (tun_key->ip_ttl) {
>> +        nl_msg_put_u8(a, OVS_TUNNEL_TTL, tun_key->ip_ttl);
>> +    } else {
>> +        return -EINVAL;
>
> I'm not sure that we need to return an error here.  If the tunnel code
> really wants to use a TTL of zero then we should let it for the time
> being.  We should just always put the attribute.
>
ok.

>> @@ -1905,23 +1962,17 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len,
>>          expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUN_ID;
>>      }
>>
>> -    if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL)) {
>> -        const struct ovs_key_ipv4_tunnel *ipv4_tun_key;
>> -
>> -        ipv4_tun_key = nl_attr_get(attrs[OVS_KEY_ATTR_IPV4_TUNNEL]);
>> -
>> -        flow->tunnel.tun_id = ipv4_tun_key->tun_id;
>> -        flow->tunnel.ip_src = ipv4_tun_key->ipv4_src;
>> -        flow->tunnel.ip_dst = ipv4_tun_key->ipv4_dst;
>> -        flow->tunnel.flags = odp_to_flow_flags(ipv4_tun_key->tun_flags);
>> -        flow->tunnel.ip_tos = ipv4_tun_key->ipv4_tos;
>> -        flow->tunnel.ip_ttl = ipv4_tun_key->ipv4_ttl;
>> +    if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) {
>> +        enum odp_key_fitness res;
>>
>> +        res = tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel);
>> +        if (res == ODP_FIT_ERROR) {
>> +            return ODP_FIT_ERROR;
>> +        } else if (res == ODP_FIT_PERFECT) {
>>          /* Allow this to show up as unexpected, if there are unknown flags,
>>           * eventually resulting in ODP_FIT_TOO_MUCH.
>>           * OVS_TNL_F_KNOWN_MASK defined locally above. */
>> -        if (!(ipv4_tun_key->tun_flags & ~OVS_TNL_F_KNOWN_MASK)) {
>> -            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL;
>> +            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUNNEL;
>>          }
>
> I think comment above this if no longer applies.
>
> Can you also make sure to test this thoroughly since it's so late in
> the release cycle?

ok, will do.



More information about the dev mailing list