[ovs-dev] [recirculation 3/3] datapath: add recirc action

Andy Zhou azhou at nicira.com
Wed Apr 16 00:07:52 UTC 2014


On Tue, Apr 15, 2014 at 7:14 AM, Pravin Shelar <pshelar at nicira.com> wrote:
> On Sat, Apr 12, 2014 at 3:30 AM, Andy Zhou <azhou at nicira.com> wrote:
>> Implements Linux kernel datapath recirc action.  Recirc action allows
>> kernel data patch reprocess the packet as if it is received from its
>> original input port, with a recirc_id added to its flow key.
>>
>> The recirc action follows a sub routine call model. The packet change
>> by actions during recirculation is invisible across a recirculation;
>> any action following the recirc action will process the same packet
>> prior to the recirc action.
>>
>> Signed-off-by: Andy Zhou <azhou at nicira.com>
>> ---
>>  datapath/actions.c      |   54 ++++++++++++++++++++++++++++++++++++++++++++++-
>>  datapath/datapath.c     |   37 +++++++++++++++++++-------------
>>  datapath/datapath.h     |    3 ++-
>>  datapath/flow_netlink.c |   31 ++++++++++++++++++++++++++-
>>  4 files changed, 107 insertions(+), 18 deletions(-)
>>
>> diff --git a/datapath/actions.c b/datapath/actions.c
>> index cb239c8..c833c69 100644
>> --- a/datapath/actions.c
>> +++ b/datapath/actions.c
>> @@ -1,5 +1,5 @@
>>  /*
>> - * Copyright (c) 2007-2013 Nicira, Inc.
>> + * Copyright (c) 2007-2014 Nicira, Inc.
>>   *
>>   * This program is free software; you can redistribute it and/or
>>   * modify it under the terms of version 2 of the GNU General Public
>> @@ -521,6 +521,34 @@ static int execute_set_action(struct sk_buff *skb,
>>         return err;
>>  }
>>
>> +static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
>> +                                const struct nlattr *recirc_attr,
>> +                                uint16_t in_port, uint32_t dp_hash)
>> +{
>> +       struct sw_flow_key recirc_key;
>> +       struct vport *p;
>> +       int err;
>> +
>> +       err = ovs_flow_extract(skb, in_port, &recirc_key);
>> +       if (err)
>> +               return err;
>> +
>> +       recirc_key.dp_hash = dp_hash;
>> +       recirc_key.recirc_id = nla_get_u32(recirc_attr);
>> +       OVS_CB(skb)->pkt_key = &recirc_key;
>> +       OVS_CB(skb)->flow = NULL;
>> +
> Do we need to reset flow here?
>
>> +       p = ovs_vport_rcu(dp, in_port);
>> +       if (unlikely(!p)) {
>> +               kfree_skb(skb);
>> +               return -ENODEV;
>> +       }
>> +
>> +       ovs_dp_process_packet_with_key(p, skb);
>> +
> Can we avoid flow-extract and vport lookup. it does add per packet
> processing cost.
>
>> +       return 0;
>> +}
>> +
>>  /* Execute a list of actions against 'skb'. */
>>  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>>                         const struct nlattr *attr, int len, bool keep_skb)
>> @@ -565,12 +593,36 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>>                         err = pop_vlan(skb);
>>                         break;
>>
>> +               case OVS_ACTION_ATTR_RECIRC: {
>> +                       struct sk_buff *recirc_skb;
>> +                       const bool last_action = (a->nla_len == rem);
>> +                       uint16_t in_port = OVS_CB(skb)->pkt_key->phy.in_port;
>> +                       uint32_t dp_hash = OVS_CB(skb)->pkt_key->dp_hash;
>> +
>> +                       if (!last_action || keep_skb)
>> +                               recirc_skb = skb_clone(skb, GFP_ATOMIC);
>> +                       else
>> +                               recirc_skb = skb;
>> +
>> +                       err = execute_recirc(dp, recirc_skb, a, in_port,
>> +                                       dp_hash);
>> +
> Why are hash and in-port are explicitly passed to execute_recirc()?
we need to pass dp_hash value to recirc_flow.
I can remove the in_port.

>
>> +                       /* Return directly if recirc is the last action
>> +                        * or err.  */
>> +                       if (last_action || err)
>> +                               return err;
>> +
>> +                       break;
>> +               }
>> +
>>                 case OVS_ACTION_ATTR_SET:
>>                         err = execute_set_action(skb, nla_data(a));
>>                         break;
>>
>>                 case OVS_ACTION_ATTR_SAMPLE:
>>                         err = sample(dp, skb, a);
>> +                       if (unlikely(err)) /* skb already freed. */
>> +                               return err;
>>                         break;
> good catch.
> This is bug fix and need to be back-ported. Can you send separate patch?
>
>>                 }
>>
>> diff --git a/datapath/datapath.c b/datapath/datapath.c
>> index 25edd7d..dab4a6b 100644
>> --- a/datapath/datapath.c
>> +++ b/datapath/datapath.c
>> @@ -240,33 +240,24 @@ void ovs_dp_detach_port(struct vport *p)
>>         ovs_vport_del(p);
>>  }
>>
>> -/* Must be called with rcu_read_lock. */
>> -void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
>> +void ovs_dp_process_packet_with_key(const struct vport *p, struct sk_buff *skb)
>>  {
>>         struct datapath *dp = p->dp;
>>         struct sw_flow *flow;
>>         struct dp_stats_percpu *stats;
>> -       struct sw_flow_key key;
>> +       struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
>>         u64 *stats_counter;
>>         u32 n_mask_hit;
>> -       int error;
>>
>>         stats = this_cpu_ptr(dp->stats_percpu);
>>
>> -       /* Extract flow from 'skb' into 'key'. */
>> -       error = ovs_flow_extract(skb, p->port_no, &key);
>> -       if (unlikely(error)) {
>> -               kfree_skb(skb);
>> -               return;
>> -       }
>> -
>>         /* Look up flow. */
>> -       flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
>> +       flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
>>         if (unlikely(!flow)) {
>>                 struct dp_upcall_info upcall;
>>
>>                 upcall.cmd = OVS_PACKET_CMD_MISS;
>> -               upcall.key = &key;
>> +               upcall.key = key;
>>                 upcall.userdata = NULL;
>>                 upcall.portid = p->upcall_portid;
>>                 ovs_dp_upcall(dp, skb, &upcall);
>> @@ -276,9 +267,8 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
>>         }
>>
>>         OVS_CB(skb)->flow = flow;
>> -       OVS_CB(skb)->pkt_key = &key;
>>
>> -       ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
>> +       ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
>>         ovs_execute_actions(dp, skb);
>>         stats_counter = &stats->n_hit;
>>
>> @@ -290,6 +280,23 @@ out:
>>         u64_stats_update_end(&stats->sync);
>>  }
>>
>> +/* Must be called with rcu_read_lock. */
>> +void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
>> +{
>> +       int error;
>> +       struct sw_flow_key key;
>> +
>> +       /* Extract flow from 'skb' into 'key'. */
>> +       error = ovs_flow_extract(skb, p->port_no, &key);
>> +       if (unlikely(error)) {
>> +               kfree_skb(skb);
>> +               return;
>> +       }
>> +
>> +       OVS_CB(skb)->pkt_key = &key;
>> +       ovs_dp_process_packet_with_key(p, skb);
>> +}
> Can you pass key ovs_dp_process_packet_with_key(), so that we can only
> one function can set OVS_CB flow and key.
>
>> +
>>  int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
>>                   const struct dp_upcall_info *upcall_info)
>>  {
>> diff --git a/datapath/datapath.h b/datapath/datapath.h
>> index 40e0f90..d3b06ee 100644
>> --- a/datapath/datapath.h
>> +++ b/datapath/datapath.h
>> @@ -1,5 +1,5 @@
>>  /*
>> - * Copyright (c) 2007-2012 Nicira, Inc.
>> + * Copyright (c) 2007-2014 Nicira, Inc.
>>   *
>>   * This program is free software; you can redistribute it and/or
>>   * modify it under the terms of version 2 of the GNU General Public
>> @@ -188,6 +188,7 @@ extern struct genl_family dp_vport_genl_family;
>>  extern struct genl_multicast_group ovs_dp_vport_multicast_group;
>>
>>  void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
>> +void ovs_dp_process_packet_with_key(const struct vport *, struct sk_buff *);
>>  void ovs_dp_detach_port(struct vport *);
>>  int ovs_dp_upcall(struct datapath *, struct sk_buff *,
>>                   const struct dp_upcall_info *);
>> diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
>> index e85326b..f5249ee 100644
>> --- a/datapath/flow_netlink.c
>> +++ b/datapath/flow_netlink.c
>> @@ -129,7 +129,8 @@ static bool match_validate(const struct sw_flow_match *match,
>>         /* Always allowed mask fields. */
>>         mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
>>                        | (1ULL << OVS_KEY_ATTR_IN_PORT)
>> -                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
>> +                      | (1ULL << OVS_KEY_ATTR_ETHERTYPE)
>> +                      | (1ULL << OVS_KEY_ATTR_RECIRC_ID));
>>
>>         /* Check key attributes. */
>>         if (match->key->dp_hash) {
>> @@ -258,6 +259,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
>>         [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
>>         [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
>>         [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
>> +       [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
>>         [OVS_KEY_ATTR_TUNNEL] = -1,
>>  };
>>
>> @@ -474,6 +476,23 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
>>                 *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
>>         }
>>
>> +       if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
>> +               u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
>> +
>> +               if (is_mask && (recirc_id > 0 && recirc_id < UINT_MAX)) {
>> +                       OVS_NLERR("Reicrc_id mask is neither wildcard nor exact match\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
>> +               *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
>> +       }
>> +
>> +       if (is_mask) {
>> +               /* Always exact match recirc_id. */
>> +               SW_FLOW_KEY_PUT(match, recirc_id, UINT_MAX, is_mask);
>> +       }
> I am not sure why is this done unconditionally? This does increases
> range for flow match.
Missing recirc_id attribute is interpreted as exact match of 0.  We
can may be move recirc_id field
below tunnel header?
>> +
>>         if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
>>                 SW_FLOW_KEY_PUT(match, phy.priority,
>>                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
>> @@ -879,6 +898,7 @@ int ovs_nla_get_flow_metadata(struct sw_flow *flow,
>>         flow->key.phy.priority = 0;
>>         flow->key.phy.skb_mark = 0;
>>         flow->key.dp_hash = 0;
>> +       flow->key.recirc_id = 0;
>
> Comment need a update.
Which comment?
>>         memset(tun_key, 0, sizeof(flow->key.tun_key));
>>
>>         err = parse_flow_nlattrs(attr, a, &attrs);
>> @@ -902,10 +922,15 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
>>         struct nlattr *nla, *encap;
>>         bool is_mask = (swkey != output);
>>
>> +       /* dp_hash and recric_id are only valid if they are nonzero. */
>>         if (swkey->dp_hash)
>>                 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->dp_hash))
>>                         goto nla_put_failure;
>>
>> +       if (swkey->recirc_id)
>> +               if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
>> +                       goto nla_put_failure;
>> +
> Can you use && rather than nested if statement?
>
>>         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
>>                 goto nla_put_failure;
>>
>> @@ -1442,6 +1467,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
>>                 /* Expected argument lengths, (u32)-1 for variable length. */
>>                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
>>                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
>> +                       [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
>>                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
>>                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
>>                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
>> @@ -1488,6 +1514,9 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
>>                                 return -EINVAL;
>>                         break;
>>
>> +               case OVS_ACTION_ATTR_RECIRC:
>> +                       break;
>> +
>>                 case OVS_ACTION_ATTR_SET:
>>                         err = validate_set(a, key, sfa, &skip_copy);
>>                         if (err)
>> --
>> 1.7.9.5
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev



More information about the dev mailing list