[ovs-dev] [PATCH] datapath: Optimize recirc action.

Pravin Shelar pshelar at nicira.com
Thu Aug 7 21:31:58 UTC 2014


On Thu, Aug 7, 2014 at 1:27 PM, Jarno Rajahalme <jrajahalme at nicira.com> wrote:
> Most of the time we do not need the updated key (i.e., when there is no recirc action). Do you think it would be worthwhile to only update the key when needed, e.g. by adding a “bool update_key;” member to struct sw_flow_actions?
>
It is good idea. I am not sure about performance benefit we will see
this. We can look at it later if we find issue with current approach.

> More comments below,
>
>   Jarno
>
> On Aug 7, 2014, at 12:32 PM, Pravin B Shelar <pshelar at nicira.com> wrote:
>
>> OVS need to flow key for flow lookup in recic action. OVS
>> does key extract in recic action. Most of cases we could
>> use OVS_CB packet key directly and can avoid packet flow key
>> extract. SET action we can update flow-key along with packet
>> to keep it consistent. But there are some action like MPLS
>> pop which forces OVS to do flow-extract. In such cases we
>> can mark flow key as invalid so that subsequent recirc
>> action can do full flow extract.
>>
>> Signed-off-by: Pravin B Shelar <pshelar at nicira.com>
>> ---
>> datapath/actions.c | 197 +++++++++++++++++++++++++++++++++++++++++++----------
>> 1 file changed, 160 insertions(+), 37 deletions(-)
>>
>> diff --git a/datapath/actions.c b/datapath/actions.c
>> index efc64f1..3c004a8 100644
>> --- a/datapath/actions.c
>> +++ b/datapath/actions.c
>> @@ -40,6 +40,90 @@
>> #include "vlan.h"
>> #include "vport.h"
>>
>> +static void flow_key_set_priority(struct sk_buff *skb, u32 priority)
>> +{
>> +     OVS_CB(skb)->pkt_key->phy.priority = priority;
>> +}
>> +
>> +static void flow_key_set_skb_mark(struct sk_buff *skb, u32 skb_mark)
>> +{
>> +     OVS_CB(skb)->pkt_key->phy.skb_mark = skb_mark;
>> +}
>> +
>> +static void flow_key_set_eth_src(struct sk_buff *skb, const u8 addr[])
>> +{
>> +     ether_addr_copy(OVS_CB(skb)->pkt_key->eth.src, addr);
>> +}
>> +
>> +static void flow_key_set_eth_dst(struct sk_buff *skb, const u8 addr[])
>> +{
>> +     ether_addr_copy(OVS_CB(skb)->pkt_key->eth.dst, addr);
>> +}
>> +
>> +static void flow_key_set_vlan_tci(struct sk_buff *skb, __be16 tci)
>> +{
>> +     OVS_CB(skb)->pkt_key->eth.tci = tci;
>> +}
>> +
>> +static void flow_key_set_ipv4_src(struct sk_buff *skb, __be32 addr)
>> +{
>> +     OVS_CB(skb)->pkt_key->ipv4.addr.src = addr;
>> +}
>> +
>> +static void flow_key_set_ipv4_dst(struct sk_buff *skb, __be32 addr)
>> +{
>> +     OVS_CB(skb)->pkt_key->ipv4.addr.src = addr;
>> +}
>> +
>> +static void flow_key_set_ipv4_tos(struct sk_buff *skb, const struct iphdr *nh)
>> +{
>> +     OVS_CB(skb)->pkt_key->ip.tos = nh->tos;
>> +}
>> +
>> +static void flow_key_set_ip_ttl(struct sk_buff *skb, u8 ttl)
>> +{
>> +     OVS_CB(skb)->pkt_key->ip.ttl = ttl;
>> +}
>> +
>> +static void flow_key_set_ipv6_src(struct sk_buff *skb,
>> +                               const __be32 addr[4])
>> +{
>> +     memcpy(&OVS_CB(skb)->pkt_key->ipv6.addr.src, addr, sizeof(__be32[4]));
>> +}
>> +
>> +static void flow_key_set_ipv6_dst(struct sk_buff *skb,
>> +                               const __be32 addr[4])
>> +{
>> +     memcpy(&OVS_CB(skb)->pkt_key->ipv6.addr.dst, addr, sizeof(__be32[4]));
>> +}
>> +
>> +static void flow_key_set_ipv6_fl(struct sk_buff *skb,
>> +                              const struct ipv6hdr *nh)
>> +{
>> +     OVS_CB(skb)->pkt_key->ipv6.label = *(__be32 *)nh &
>> +                                        htonl(IPV6_FLOWINFO_FLOWLABEL);
>> +}
>> +
>> +static void flow_key_set_tp_src(struct sk_buff *skb, __be16 port)
>> +{
>> +     OVS_CB(skb)->pkt_key->tp.src = port;
>> +}
>> +
>> +static void flow_key_set_tp_dst(struct sk_buff *skb, __be16 port)
>> +{
>> +     OVS_CB(skb)->pkt_key->tp.dst = port;
>> +}
>> +
>> +static void invalidate_skb_flow_key(struct sk_buff *skb)
>> +{
>> +     OVS_CB(skb)->pkt_key->eth.type = 0;
>> +}
>> +
>> +static bool is_skb_flow_key_valid(struct sk_buff *skb)
>> +{
>> +     return !!OVS_CB(skb)->pkt_key->eth.type;
>> +}
>> +
>> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>>                             const struct nlattr *attr, int len);
>>
>> @@ -90,6 +174,7 @@ static int push_mpls(struct sk_buff *skb,
>>       if (!ovs_skb_get_inner_protocol(skb))
>>               ovs_skb_set_inner_protocol(skb, skb->protocol);
>>       skb->protocol = mpls->mpls_ethertype;
>> +     invalidate_skb_flow_key(skb);
>>       return 0;
>> }
>>
>> @@ -120,6 +205,7 @@ static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
>>       hdr->h_proto = ethertype;
>>       if (eth_p_mpls(skb->protocol))
>>               skb->protocol = ethertype;
>> +     invalidate_skb_flow_key(skb);
>>       return 0;
>> }
>>
>> @@ -139,7 +225,7 @@ static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
>>       }
>>
>>       *stack = *mpls_lse;
>> -
>> +     invalidate_skb_flow_key(skb);
>
> Updating the OVS_CB(skb)->pkt_key->mpls.top_lse should be done instead of invalidating the flow key.
>
ok.

>>       return 0;
>> }
>>
>> @@ -189,9 +275,12 @@ static int pop_vlan(struct sk_buff *skb)
>>       }
>>       /* move next vlan tag to hw accel tag */
>>       if (likely(skb->protocol != htons(ETH_P_8021Q) ||
>> -                skb->len < VLAN_ETH_HLEN))
>> +                skb->len < VLAN_ETH_HLEN)) {
>> +             flow_key_set_vlan_tci(skb, 0);
>>               return 0;
>> +     }
>>
>> +     invalidate_skb_flow_key(skb);
>>       err = __pop_vlan_tci(skb, &tci);
>>       if (unlikely(err))
>>               return err;
>
> In the latter case, where there is a next vlan tag, could you just flow_key_set_vlan_tci(skb, tci) or do we need to do full extraction again?
>

There could be multiple levels of vlan so we need full packet extract.

>> @@ -220,6 +309,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
>>
>>       }
>>       __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
>> +     invalidate_skb_flow_key(skb);
>
> At least in the case no vlan tag existed before, could we just set the tag in the key instead of invalidation?
>
ok.

>>       return 0;
>> }
>>
>> @@ -238,11 +328,13 @@ static int set_eth_addr(struct sk_buff *skb,
>>
>>       ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
>>
>> +     flow_key_set_eth_src(skb, eth_key->eth_src);
>> +     flow_key_set_eth_dst(skb, eth_key->eth_dst);
>>       return 0;
>> }
>>
>> static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
>> -                             __be32 *addr, __be32 new_addr)
>> +                     __be32 *addr, __be32 new_addr)
>> {
>>       int transport_len = skb->len - skb_transport_offset(skb);
>>
>> @@ -333,17 +425,25 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
>>
>>       nh = ip_hdr(skb);
>>
>> -     if (ipv4_key->ipv4_src != nh->saddr)
>> +     if (ipv4_key->ipv4_src != nh->saddr) {
>>               set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
>> +             flow_key_set_ipv4_src(skb, ipv4_key->ipv4_src);
>> +     }
>>
>> -     if (ipv4_key->ipv4_dst != nh->daddr)
>> +     if (ipv4_key->ipv4_dst != nh->daddr) {
>>               set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
>> +             flow_key_set_ipv4_dst(skb, ipv4_key->ipv4_dst);
>> +     }
>>
>> -     if (ipv4_key->ipv4_tos != nh->tos)
>> +     if (ipv4_key->ipv4_tos != nh->tos) {
>>               ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
>> +             flow_key_set_ipv4_tos(skb, nh);
>> +     }
>>
>> -     if (ipv4_key->ipv4_ttl != nh->ttl)
>> +     if (ipv4_key->ipv4_ttl != nh->ttl) {
>>               set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
>> +             flow_key_set_ip_ttl(skb, ipv4_key->ipv4_ttl);
>> +     }
>>
>>       return 0;
>> }
>> @@ -364,9 +464,11 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
>>       saddr = (__be32 *)&nh->saddr;
>>       daddr = (__be32 *)&nh->daddr;
>>
>> -     if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
>> +     if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
>>               set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
>>                             ipv6_key->ipv6_src, true);
>> +             flow_key_set_ipv6_src(skb, ipv6_key->ipv6_src);
>> +     }
>>
>>       if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
>>               unsigned int offset = 0;
>> @@ -380,12 +482,15 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
>>
>>               set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
>>                             ipv6_key->ipv6_dst, recalc_csum);
>> +             flow_key_set_ipv6_dst(skb, ipv6_key->ipv6_dst);
>>       }
>>
>>       set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
>
> Why not set the ip.tos in the flow key?
>
yes, I missed it.

>>       set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
>> -     nh->hop_limit = ipv6_key->ipv6_hlimit;
>> +     flow_key_set_ipv6_fl(skb, nh);
>>
>> +     nh->hop_limit = ipv6_key->ipv6_hlimit;
>> +     flow_key_set_ip_ttl(skb, ipv6_key->ipv6_hlimit);
>>       return 0;
>> }
>>
>> @@ -424,11 +529,15 @@ static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
>>               return err;
>>
>>       uh = udp_hdr(skb);
>> -     if (udp_port_key->udp_src != uh->source)
>> +     if (udp_port_key->udp_src != uh->source) {
>>               set_udp_port(skb, &uh->source, udp_port_key->udp_src);
>> +             flow_key_set_tp_src(skb, udp_port_key->udp_src);
>> +     }
>>
>> -     if (udp_port_key->udp_dst != uh->dest)
>> +     if (udp_port_key->udp_dst != uh->dest) {
>>               set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
>> +             flow_key_set_tp_dst(skb, udp_port_key->udp_dst);
>> +     }
>>
>>       return 0;
>> }
>> @@ -444,17 +553,21 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
>>               return err;
>>
>>       th = tcp_hdr(skb);
>> -     if (tcp_port_key->tcp_src != th->source)
>> +     if (tcp_port_key->tcp_src != th->source) {
>>               set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
>> +             flow_key_set_tp_src(skb, tcp_port_key->tcp_src);
>> +     }
>>
>> -     if (tcp_port_key->tcp_dst != th->dest)
>> +     if (tcp_port_key->tcp_dst != th->dest) {
>>               set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
>> +             flow_key_set_tp_dst(skb, tcp_port_key->tcp_dst);
>> +     }
>>
>>       return 0;
>> }
>>
>> static int set_sctp(struct sk_buff *skb,
>> -                  const struct ovs_key_sctp *sctp_port_key)
>> +                 const struct ovs_key_sctp *sctp_port_key)
>> {
>>       struct sctphdr *sh;
>>       int err;
>> @@ -481,6 +594,8 @@ static int set_sctp(struct sk_buff *skb,
>>               sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
>>
>>               skb_clear_hash(skb);
>> +             flow_key_set_tp_src(skb, sctp_port_key->sctp_src);
>> +             flow_key_set_tp_dst(skb, sctp_port_key->sctp_dst);
>>       }
>>
>>       return 0;
>> @@ -603,10 +718,12 @@ static int execute_set_action(struct sk_buff *skb,
>>       switch (nla_type(nested_attr)) {
>>       case OVS_KEY_ATTR_PRIORITY:
>>               skb->priority = nla_get_u32(nested_attr);
>> +             flow_key_set_priority(skb, skb->priority);
>>               break;
>>
>>       case OVS_KEY_ATTR_SKB_MARK:
>>               skb->mark = nla_get_u32(nested_attr);
>> +             flow_key_set_skb_mark(skb, skb->mark);
>>               break;
>>
>>       case OVS_KEY_ATTR_TUNNEL_INFO:
>> @@ -646,20 +763,40 @@ static int execute_set_action(struct sk_buff *skb,
>> }
>>
>> static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
>> -                       const struct nlattr *a)
>> +                       const struct nlattr *a, int rem)
>> {
>>       struct sw_flow_key recirc_key;
>>       int err;
>>
>> -     err = ovs_flow_key_extract_recirc(nla_get_u32(a), OVS_CB(skb)->pkt_key,
>> -                                       skb, &recirc_key);
>> -     if (err) {
>> -             kfree_skb(skb);
>> -             return err;
>> +     if (!last_action(a, rem)) {
>> +             /* Recirc action is the not the last action
>> +              * of the action list. */
>> +             skb = skb_clone(skb, GFP_ATOMIC);
>> +
>> +             /* Skip the recirc action when out of memory, but
>> +              * continue on with the rest of the action list. */
>> +             if (!skb)
>> +                     return 0;
>>       }
>>
>> -     ovs_dp_process_packet(skb, true);
>> +     if (is_skb_flow_key_valid(skb)) {
>> +             if (!last_action(a, rem)) {
>> +                     recirc_key = *OVS_CB(skb)->pkt_key;
>> +                     OVS_CB(skb)->pkt_key = &recirc_key;
>> +             }
>> +             OVS_CB(skb)->pkt_key->recirc_id = nla_get_u32(a);
>> +     } else {
>> +             struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key;
>>
>> +             err = ovs_flow_key_extract_recirc(nla_get_u32(a), pkt_key,
>> +                                               skb, &recirc_key);
>> +             if (err) {
>> +                     kfree_skb(skb);
>> +                     return err;
>> +             }
>> +     }
>> +
>> +     ovs_dp_process_packet(skb, true);
>>       return 0;
>> }
>>
>> @@ -719,23 +856,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>>                       err = pop_vlan(skb);
>>                       break;
>>
>> -             case OVS_ACTION_ATTR_RECIRC: {
>> -                     struct sk_buff *recirc_skb;
>> -
>> -                     if (last_action(a, rem))
>> -                             return execute_recirc(dp, skb, a);
>> -
>> -                     /* Recirc action is the not the last action
>> -                      * of the action list. */
>> -                     recirc_skb = skb_clone(skb, GFP_ATOMIC);
>> -
>> -                     /* Skip the recirc action when out of memory, but
>> -                      * continue on with the rest of the action list. */
>> -                     if (recirc_skb)
>> -                             err = execute_recirc(dp, recirc_skb, a);
>> -
>> +             case OVS_ACTION_ATTR_RECIRC:
>> +                     err = execute_recirc(dp, skb, a, rem);
>>                       break;
>> -             }
>>
>>               case OVS_ACTION_ATTR_SET:
>>                       err = execute_set_action(skb, nla_data(a));
>> --
>> 1.9.3
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
>



More information about the dev mailing list