[ovs-dev] [PATCH V2 3/4] tc: Add header rewrite using tc pedit action

Roi Dayan roid at mellanox.com
Wed Oct 25 11:24:15 UTC 2017



On 27/09/2017 12:08, Simon Horman wrote:
> On Mon, Sep 25, 2017 at 04:31:42PM +0300, Paul Blakey wrote:
>>
>>
>> On 18/09/2017 18:01, Simon Horman wrote:
>>> On Mon, Sep 18, 2017 at 07:16:03AM +0300, Roi Dayan wrote:
>>>> From: Paul Blakey <paulb at mellanox.com>
>>>>
>>>> To be later used to implement ovs action set offloading.
>>>>
>>>> Signed-off-by: Paul Blakey <paulb at mellanox.com>
>>>> Reviewed-by: Roi Dayan <roid at mellanox.com>
>>>> ---
>>>>   lib/tc.c | 372 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>>>   lib/tc.h |  16 +++
>>>>   2 files changed, 385 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/lib/tc.c b/lib/tc.c
>>>> index c9cada2..743b2ee 100644
>>>> --- a/lib/tc.c
>>>> +++ b/lib/tc.c
>>>> @@ -21,8 +21,10 @@
>>>>   #include <errno.h>
>>>>   #include <linux/if_ether.h>
>>>>   #include <linux/rtnetlink.h>
>>>> +#include <linux/tc_act/tc_csum.h>
>>>>   #include <linux/tc_act/tc_gact.h>
>>>>   #include <linux/tc_act/tc_mirred.h>
>>>> +#include <linux/tc_act/tc_pedit.h>
>>>>   #include <linux/tc_act/tc_tunnel_key.h>
>>>>   #include <linux/tc_act/tc_vlan.h>
>>>>   #include <linux/gen_stats.h>
>>>> @@ -33,11 +35,14 @@
>>>>   #include "netlink-socket.h"
>>>>   #include "netlink.h"
>>>>   #include "openvswitch/ofpbuf.h"
>>>> +#include "openvswitch/util.h"
>>>>   #include "openvswitch/vlog.h"
>>>>   #include "packets.h"
>>>>   #include "timeval.h"
>>>>   #include "unaligned.h"
>>>> +#define MAX_PEDIT_OFFSETS 8
>>>
>>> Why 8?
>> We don't expect anything more right now (ipv6 src/dst rewrite requires 8
>> pedits iirc). I can't think of a larger use case, maybe ipv6 + macs if
>> that's makes sens. do you suggest we increase it? to what?
> 
> It seems strange to me to place a somewhat arbitrary small limit
> when none exists in the pedit API being used. I would at prefer if
> it was at least a bigger, say 16 or 32.


Hi Simon,

Sorry for the late reply due to holidays and vacations.
Me & Paul going to go over this and do the fixes needed and
also rebase over latest master and run tests again.

I'll answer what I'm more familiar with now and Paul will continue.
The 8 here is too low and you right. We used this definition
for allocation of the pedit keys on the stack in
nl_msg_put_flower_rewrite_pedits()

It was for convenience instead of calculating the maximum possible
keys that could exists and allocating it there and freeing it at
the end.

Increasing it to 32 is probably more than enough and wont waste much.


> 
>>>>   VLOG_DEFINE_THIS_MODULE(tc);
>>>>   static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
>>>> @@ -50,6 +55,82 @@ enum tc_offload_policy {
>>>>   static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
>>>> +struct tc_pedit_key_ex {
>>>> +    enum pedit_header_type htype;
>>>> +    enum pedit_cmd cmd;
>>>> +};
>>>> +
>>>> +struct flower_key_to_pedit {
>>>> +    enum pedit_header_type htype;
>>>> +    int flower_offset;
>>>> +    int offset;
>>>> +    int size;
>>>> +};
>>>> +
>>>> +static struct flower_key_to_pedit flower_pedit_map[] = {
>>>> +    {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
>>>> +        12,
>>>> +        offsetof(struct tc_flower_key, ipv4.ipv4_src),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
>>>> +        16,
>>>> +        offsetof(struct tc_flower_key, ipv4.ipv4_dst),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
>>>> +        8,
>>>> +        offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
>>>> +        8,
>>>> +        offsetof(struct tc_flower_key, ipv6.ipv6_src),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
>>>> +        24,
>>>> +        offsetof(struct tc_flower_key, ipv6.ipv6_dst),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
>>>> +        6,
>>>> +        offsetof(struct tc_flower_key, src_mac),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, src_mac)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
>>>> +        0,
>>>> +        offsetof(struct tc_flower_key, dst_mac),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
>>>> +        12,
>>>> +        offsetof(struct tc_flower_key, eth_type),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, eth_type)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
>>>> +        0,
>>>> +        offsetof(struct tc_flower_key, tcp_src),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
>>>> +        2,
>>>> +        offsetof(struct tc_flower_key, tcp_dst),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
>>>> +        0,
>>>> +        offsetof(struct tc_flower_key, udp_src),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, udp_src)
>>>> +    }, {
>>>> +        TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
>>>> +        2,
>>>> +        offsetof(struct tc_flower_key, udp_dst),
>>>> +        MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
>>>> +    },
>>>> +};
>>>> +
>>>>   struct tcmsg *
>>>>   tc_make_request(int ifindex, int type, unsigned int flags,
>>>>                   struct ofpbuf *request)
>>>> @@ -365,6 +446,96 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
>>>>       }
>>>>   }
>>>> +static const struct nl_policy pedit_policy[] = {
>>>> +            [TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
>>>> +                                     .min_len = sizeof(struct tc_pedit),
>>>> +                                     .optional = false, },
>>>> +            [TCA_PEDIT_KEYS_EX]   = { .type = NL_A_NESTED,
>>>> +                                      .optional = false, },
>>>> +};
>>>> +
>>>> +static int
>>>> +nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
>>>> +{
>>>> +    struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
>>>> +    const struct tc_pedit *pe;
>>>> +    const struct tc_pedit_key *keys;
>>>> +    const struct nlattr *nla, *keys_ex, *ex_type;
>>>> +    const void *keys_attr;
>>>> +    char *rewrite_key = (void *) &flower->rewrite.key;
>>>> +    char *rewrite_mask = (void *) &flower->rewrite.mask;
>>>> +    size_t keys_ex_size, left;
>>>> +    int type, i = 0;
>>>> +
>>>> +    if (!nl_parse_nested(options, pedit_policy, pe_attrs,
>>>> +                         ARRAY_SIZE(pedit_policy))) {
>>>> +        VLOG_ERR_RL(&error_rl, "failed to parse pedit action options");
>>>> +        return EPROTO;
>>>> +    }
>>>> +
>>>> +    pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof *pe);
>>>> +    keys = pe->keys;
>>>> +    keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX];
>>>> +    keys_ex = nl_attr_get(keys_attr);
>>>> +    keys_ex_size = nl_attr_get_size(keys_attr);
>>>> +
>>>> +    NL_ATTR_FOR_EACH (nla, left, keys_ex, keys_ex_size) {
>>>> +        if (i >= pe->nkeys) {
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        if (nl_attr_type(nla) == TCA_PEDIT_KEY_EX) {
>>>> +            ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
>>>> +            type = nl_attr_get_u16(ex_type);
>>>> +
>>>> +            for (int j = 0; j < ARRAY_SIZE(flower_pedit_map); j++) {
>>>> +                struct flower_key_to_pedit *m = &flower_pedit_map[j];
>>>> +                int flower_off = m->flower_offset;
>>>> +                int sz = m->size;
>>>> +                int mf = m->offset;
>>>> +
>>>> +                if (m->htype != type) {
>>>> +                   continue;
>>>> +                }
>>>> +
>>>> +                /* check overlap between current pedit key, which is always
>>>> +                 * 4 bytes (range [off, off + 3]), and a map entry in
>>>> +                 * flower_pedit_map (range [mf, mf + sz - 1]) */
>>>> +                if ((keys->off >= mf && keys->off < mf + sz)
>>>> +                    || (keys->off + 3 >= mf && keys->off + 3 < mf + sz)) {
>>>> +                    int diff = flower_off + (keys->off - mf);
>>>> +                    uint32_t *dst = (void *) (rewrite_key + diff);
>>>> +                    uint32_t *dst_m = (void *) (rewrite_mask + diff);
>>>> +                    uint32_t mask = ~(keys->mask);
>>>> +                    uint32_t zero_bits;
>>>> +
>>>> +                    if (keys->off < mf) {
>>>> +                        zero_bits = 8 * (mf - keys->off);
>>>> +                        mask &= UINT32_MAX << zero_bits;
>>>> +                    } else if (keys->off + 4 > mf + m->size) {
>>>> +                        zero_bits = 8 * (keys->off + 4 - mf - m->size);
>>>> +                        mask &= UINT32_MAX >> zero_bits;
>>>> +                    }
>>>> +
>>>> +                    *dst_m |= mask;
>>>> +                    *dst |= keys->val & mask;
>>>> +                }
>>>> +            }
>>>
>>> If I understand the above correctly it is designed to make
>>> pedit actions disjoint. If so, why is that necessary? >
>>
>> It's not, as a single flower key rewrite can be split to multiple pedit
>> actions it finds the overlap between a flower key and a pedit action, if
>> they do overlap it translates it to the correct offset and masks it out.
> 
> Thanks, understood.
> 
>>
>>>> +        } else {
>>>> +            VLOG_ERR_RL(&error_rl, "unable to parse legacy pedit type: %d",
>>>> +                        nl_attr_type(nla));
>>>> +            return EOPNOTSUPP;
>>>> +        }
>>>
>>> I think the code could exit early here as
>>> nl_msg_put_flower_rewrite_pedits() does below.
>>>
>>
>> Sorry, didn't understand. can you give an example?
>>
> 
> I meant something like this.
> 
>              if (nl_attr_type(nla) != TCA_PEDIT_KEY_EX) {
>                  VLOG_ERR_RL(...);
>                  return EOPNOTSUPP;
>              }

understood. we'll do that. thanks.

> 
> 	    /* Overlap detection code goes here */
> 
>>>
>>>> +
>>>> +        keys++;
>>>> +        i++;
>>>> +    }
>>>> +
>>>> +    flower->rewrite.rewrite = true;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static const struct nl_policy tunnel_key_policy[] = {
>>>>       [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
>>>>                                  .min_len = sizeof(struct tc_tunnel_key),
>>>> @@ -608,6 +779,11 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
>>>>           nl_parse_act_vlan(act_options, flower);
>>>>       } else if (!strcmp(act_kind, "tunnel_key")) {
>>>>           nl_parse_act_tunnel_key(act_options, flower);
>>>> +    } else if (!strcmp(act_kind, "pedit")) {
>>>> +        nl_parse_act_pedit(act_options, flower);
>>>> +    } else if (!strcmp(act_kind, "csum")) {
>>>> +        /* not doing anything for now, ovs has an implicit csum recalculation
>>>> +         * with rewriting of packet headers (translating of pedit acts). */
>>>
>>> I wonder if the absence of a csum action when needed (by TC)
>>> should be treated as an error >
>>
>> Do you mean validating csum flags from each protocol and such (like in put)?
> 
> Yes, I think so.
> 
> In OvS (without TC acceleration) csum recalculation is implicit
> but with TC an explicit csum update action is required. I see
> in your code you are handling this by adding csum actions to TC actions
> when translating from OvS DPIF actions. And in the reverse (above) csum
> actions are simply ignored.
> 
> What I am wondering is if when translating TC actions to OvS DPIF actions
> you should track if the TC actions should include a csum rule because
> of other actions and treat its absence as an error.
> 
>>
>>>>       } else {
>>>>           VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>>>>           return EINVAL;
>>>> @@ -809,6 +985,48 @@ tc_get_tc_cls_policy(enum tc_offload_policy policy)
>>>>   }
>>>>   static void
>>>> +nl_msg_put_act_csum(struct ofpbuf *request, uint32_t flags)
>>>> +{
>>>> +    size_t offset;
>>>> +
>>>> +    nl_msg_put_string(request, TCA_ACT_KIND, "csum");
>>>> +    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
>>>> +    {
>>>> +        struct tc_csum parm = { .action = TC_ACT_PIPE,
>>>> +                                .update_flags = flags };
>>>> +
>>>> +        nl_msg_put_unspec(request, TCA_CSUM_PARMS, &parm, sizeof parm);
>>>> +    }
>>>> +    nl_msg_end_nested(request, offset);
>>>> +}
>>>> +
>>>> +static void
>>>> +nl_msg_put_act_pedit(struct ofpbuf *request, struct tc_pedit *parm,
>>>> +                     struct tc_pedit_key_ex *ex)
>>>> +{
>>>> +    size_t ksize = sizeof *parm + (parm->nkeys * sizeof(struct tc_pedit_key));
>>>
>>> Are there unnecessary () on the line above?
>> No, I'll remove them.
>>
>>>
>>>> +    size_t offset, offset_keys_ex, offset_key;
>>>> +    int i;
>>>> +
>>>> +    nl_msg_put_string(request, TCA_ACT_KIND, "pedit");
>>>> +    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
>>>> +    {
>>>> +        parm->action = TC_ACT_PIPE;
>>>> +
>>>> +        nl_msg_put_unspec(request, TCA_PEDIT_PARMS_EX, parm, ksize);
>>>> +        offset_keys_ex = nl_msg_start_nested(request, TCA_PEDIT_KEYS_EX);
>>>> +        for (i = 0; i < parm->nkeys; i++, ex++) {
>>>> +            offset_key = nl_msg_start_nested(request, TCA_PEDIT_KEY_EX);
>>>> +            nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_HTYPE, ex->htype);
>>>> +            nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_CMD, ex->cmd);
>>>> +            nl_msg_end_nested(request, offset_key);
>>>> +        }
>>>> +        nl_msg_end_nested(request, offset_keys_ex);
>>>> +    }
>>>> +    nl_msg_end_nested(request, offset);
>>>> +}
>>>> +
>>>> +static void
>>>>   nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
>>>>   {
>>>>       size_t offset;
>>>> @@ -930,7 +1148,127 @@ nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>>>>       }
>>>>   }
>>>> +/* Given flower, a key_to_pedit map entry, calculates the rest,
>>>> + * where:
>>>> + *
>>>> + * mask, data - pointers of where read the first word of flower->key/mask.
>>>> + * current_offset - which offset to use for the first pedit action.
>>>> + * cnt - max pedits actions to use.
>>>> + * first_word_mask/last_word_mask - the mask to use for the first/last read
>>>> + * (as we read entire words). */
>>>>   static void
>>>> +calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m,
>>>> +             int *cur_offset, int *cnt, uint32_t *last_word_mask,
>>>> +             uint32_t *first_word_mask, uint32_t **mask, uint32_t **data)
>>>> +{
>>>> +    int start_offset, max_offset, total_size;
>>>> +    int diff, right_zero_bits, left_zero_bits;
>>>> +    char *rewrite_key = (void *) &flower->rewrite.key;
>>>> +    char *rewrite_mask = (void *) &flower->rewrite.mask;
>>>> +
>>>> +    max_offset = m->offset + m->size;
>>>> +    start_offset = ROUND_DOWN(m->offset, 4);
>>>> +    diff = m->offset - start_offset;
>>>> +    total_size = max_offset - start_offset;
>>>> +    right_zero_bits = 8 * (4 - (max_offset % 4));
>>>> +    left_zero_bits = 8 * (m->offset - start_offset);
>>>> +
>>>> +    *cur_offset = start_offset;
>>>> +    *cnt = (total_size / 4) + (total_size % 4 ? 1 : 0);
>>>> +    *last_word_mask = UINT32_MAX >> right_zero_bits;
>>>> +    *first_word_mask = UINT32_MAX << left_zero_bits;
>>>> +    *data = (void *) (rewrite_key + m->flower_offset - diff);
>>>> +    *mask = (void *) (rewrite_mask + m->flower_offset - diff);
>>>
>>> The type of *data and *mask is uint32_t *.
>>> Why not cast to that type?
>>
>> It's to avoid warning of increasing pointer alignment (-Wcast-align).
> 
> It sounds like the warning should be addressed rather than overridden using
> a cast.
> 
>>>
>>>> +}
>>>> +
>>>> +static inline void
>>>> +csum_update_flag(struct tc_flower *flower,
>>>> +                 enum pedit_header_type htype) {
>>>
>>> I think the above two lines could be one.
>>>
>>>> +    if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4) {
>>>
>>> A case statement might be nicer here.
>>
>> Right, thanks.
>>
>>>
>>>> +        flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
>>>> +    }
>>>> +    if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
>>>> +        || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
>>>> +        || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
>>>> +        || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_UDP) {
>>>> +        if (flower->key.ip_proto == IPPROTO_TCP) {
>>>> +            flower->mask.ip_proto = UINT8_MAX;
>>>
>>> What if the mask was not UINT8_MAX to start with?
>>> Doesn't this create a different flow?
>>
>> This is so the checksum will be strict (not setting/recalc udp checksum on
>> non udp packets). It creates a more specific flow, a subset of the original.
>> This is fine as datapath is free to ignore a mask, which is the same as a
>> full mask we've set.
> 
> I'm not sure that I understand why its fine for the datapath to ignore the
> mask.

I'll try to explain.
When we want the HW to recalc the csum we need to also do matching on
it. We do that for TCP, UDP, ICMP.

> 
>>>> +            flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
>>>> +        } else if (flower->key.ip_proto == IPPROTO_UDP) {
>>>> +            flower->mask.ip_proto = UINT8_MAX;
>>>> +            flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
>>>> +        } else if (flower->key.ip_proto == IPPROTO_ICMP
>>>> +                   || flower->key.ip_proto == IPPROTO_ICMPV6) {
>>>> +            flower->mask.ip_proto = UINT8_MAX;
>>>> +            flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_ICMP;
>>>> +        }
>>>> +    }
>>>> +}
>>>> +
>>>> +static int
>>>> +nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
>>>> +                                 struct tc_flower *flower)
>>>> +{
>>>> +    struct {
>>>> +        struct tc_pedit sel;
>>>> +        struct tc_pedit_key keys[MAX_PEDIT_OFFSETS];
>>>> +        struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS];
>>>> +    } sel = {
>>>> +        .sel = {
>>>> +            .nkeys = 0
>>>> +        }
>>>> +    };
>>>> +    int i, j;
>>>> +
>>>> +    for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) {
>>>> +        struct flower_key_to_pedit *m = &flower_pedit_map[i];
>>>> +        struct tc_pedit_key *pedit_key = NULL;
>>>> +        struct tc_pedit_key_ex *pedit_key_ex = NULL;
>>>> +        uint32_t *mask, *data, first_word_mask, last_word_mask;
>>>> +        int cnt = 0, cur_offset = 0;
>>>> +
>>>> +        if (!m->size) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask,
>>>> +                     &first_word_mask, &mask, &data);
>>>> +
>>>> +        for (j = 0; j < cnt; j++,  mask++, data++, cur_offset += 4) {
>>>> +            uint32_t mask_word = *mask;
>>>> +
>>>> +            if (j == 0) {
>>>> +                mask_word &= first_word_mask;
>>>> +            }
>>>> +            if (j == cnt - 1) {
>>>> +                mask_word &= last_word_mask;
>>>> +            }
>>>> +            if (!mask_word) {
>>>> +                continue;
>>>> +            }
>>>> +            if (sel.sel.nkeys == MAX_PEDIT_OFFSETS) {
>>>> +                VLOG_WARN_RL(&error_rl, "reached too many pedit offsets: %d",
>>>> +                             MAX_PEDIT_OFFSETS);
>>>> +                return EOPNOTSUPP;
>>>> +            }
>>>> +
>>>> +            pedit_key = &sel.keys[sel.sel.nkeys];
>>>> +            pedit_key_ex = &sel.keys_ex[sel.sel.nkeys];
>>>> +            pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_SET;
>>>> +            pedit_key_ex->htype = m->htype;
>>>> +            pedit_key->off = cur_offset;
>>>> +            pedit_key->mask = ~mask_word;
>>>> +            pedit_key->val = *data & mask_word;
>>>> +            sel.sel.nkeys++;
>>>> +            csum_update_flag(flower, m->htype);
>>>> +        }
>>>> +    }
>>>> +    nl_msg_put_act_pedit(request, &sel.sel, sel.keys_ex);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int
>>>>   nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
>>>>   {
>>>>       size_t offset;
>>>> @@ -939,7 +1277,20 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
>>>>       offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
>>>>       {
>>>>           uint16_t act_index = 1;
>>>> +        int error;
>>>> +        if (flower->rewrite.rewrite) {
>>>> +            act_offset = nl_msg_start_nested(request, act_index++);
>>>> +            error = nl_msg_put_flower_rewrite_pedits(request, flower);
>>>> +            if (error) {
>>>> +                return error;
>>>> +            }
>>>> +            nl_msg_end_nested(request, act_offset);
>>>> +
>>>> +            act_offset = nl_msg_start_nested(request, act_index++);
>>>> +            nl_msg_put_act_csum(request, flower->csum_update_flags);
>>>> +            nl_msg_end_nested(request, act_offset);
>>>> +        }
>>>>           if (flower->set.set) {
>>>>               act_offset = nl_msg_start_nested(request, act_index++);
>>>>               nl_msg_put_act_tunnel_key_set(request, flower->set.id,
>>>> @@ -980,6 +1331,8 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
>>>>           }
>>>>       }
>>>>       nl_msg_end_nested(request, offset);
>>>> +
>>>> +    return 0;
>>>>   }
>>>>   static void
>>>> @@ -1021,11 +1374,19 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
>>>>       nl_msg_put_masked_value(request, type, type##_MASK, &flower->key.member, \
>>>>                               &flower->mask.member, sizeof flower->key.member)
>>>> -static void
>>>> +static int
>>>>   nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>>>   {
>>>> +
>>>>       uint16_t host_eth_type = ntohs(flower->key.eth_type);
>>>>       bool is_vlan = (host_eth_type == ETH_TYPE_VLAN);
>>>> +    int err;
>>>> +
>>>> +    /* need to parse acts first as some acts require changing the matching */
>>>
>>> This seems strange to me.
>>
>> from the strict (normalized?) header checksum.
> 
> I think this code relates to setting flower->key.ip_proto == IPPROTO_TCP
> above. Is that correct. Are there any other cases?

yes and also for UDP and ICMP. as explained above.
So going over the acts in header rewrite case we might want to signal 
the HW recalc csum and we need matching on those fields that require new
csum so the matching mask being updated. this is why we parse the acts
first.


> 
>>
>>>
>>>> +    err  = nl_msg_put_flower_acts(request, flower);
>>>> +    if (err) {
>>>> +        return err;
>>>> +    }
>>>>       if (is_vlan) {
>>>>           host_eth_type = ntohs(flower->key.encap_eth_type);
>>>> @@ -1083,7 +1444,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>>>           nl_msg_put_flower_tunnel(request, flower);
>>>>       }
>>>> -    nl_msg_put_flower_acts(request, flower);
>>>> +    return 0;
>>>>   }
>>>>   int
>>>> @@ -1106,7 +1467,12 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>>>       nl_msg_put_string(&request, TCA_KIND, "flower");
>>>>       basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
>>>>       {
>>>> -        nl_msg_put_flower_options(&request, flower);
>>>> +        error = nl_msg_put_flower_options(&request, flower);
>>>> +
>>>> +        if (error) {
>>>> +            ofpbuf_uninit(&request);
>>>> +            return error;
>>>> +        }
>>>>       }
>>>>       nl_msg_end_nested(&request, basic_offset);
>>>> diff --git a/lib/tc.h b/lib/tc.h
>>>> index 6c69b79..7876051 100644
>>>> --- a/lib/tc.h
>>>> +++ b/lib/tc.h
>>>> @@ -96,6 +96,7 @@ struct tc_flower_key {
>>>>       struct {
>>>>           ovs_be32 ipv4_src;
>>>>           ovs_be32 ipv4_dst;
>>>> +        uint8_t rewrite_ttl;
>>>>       } ipv4;
>>>>       struct {
>>>>           struct in6_addr ipv6_src;
>>>> @@ -120,6 +121,14 @@ struct tc_flower {
>>>>       uint64_t lastused;
>>>>       struct {
>>>> +        bool rewrite;
>>>> +        struct tc_flower_key key;
>>>> +        struct tc_flower_key mask;
>>>> +    } rewrite;
>>>> +
>>>> +    uint32_t csum_update_flags;
>>>> +
>>>> +    struct {
>>>>           bool set;
>>>>           ovs_be64 id;
>>>>           ovs_be16 tp_src;
>>>> @@ -152,6 +161,13 @@ struct tc_flower {
>>>>       struct tc_cookie act_cookie;
>>>>   };
>>>> +/* assert that if we overflow with a masked write of uint32_t to the last byte
>>>> + * of flower.rewrite we overflow inside struct flower.
>>>> + * shouldn't happen unless someone moves rewrite to the end of flower */
>>>> +BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
>>>> +                  + MEMBER_SIZEOF(struct tc_flower, rewrite)
>>>> +                  + sizeof(uint32_t) - 2 < sizeof(struct tc_flower));
>>>> +
>>>>   int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>>>                         struct tc_flower *flower);
>>>>   int tc_del_filter(int ifindex, int prio, int handle);
>>>> -- 
>>>> 2.7.5
>>>>


More information about the dev mailing list