[ovs-dev] [runt-flows 5/8] datapath: Report memory allocation errors in flow_extract().

Jesse Gross jesse at nicira.com
Thu Aug 26 21:56:04 UTC 2010


On Fri, Aug 13, 2010 at 10:55 AM, Ben Pfaff <blp at nicira.com> wrote:
> Until now flow_extract() has simply returned a bogus flow when memory
> allocation errors occurred.  This fixes the problem by propagating the
> error to the caller.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>

Hmm, that's pretty nasty behavior that we had before.

One general comment about this is that the behavior seems a little
asymmetrical for the different protocol types: why switch to this new
style for IP but not for ARP?  Why are the L4 protocols different from
L3?  Why can't we just do all of this at the beginning?  The answer to
all of these questions is (I assume) "IP options".  However, it took a
little more thought to convince myself that it is OK than seems
necessary.

> ---
>  datapath/datapath.c |   12 ++++++++++--
>  datapath/flow.c     |   49 ++++++++++++++++++++++++++++++++++++-------------
>  2 files changed, 46 insertions(+), 15 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 8bf394f..44764aa 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -547,11 +547,17 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
>        struct sw_flow *flow;
>        struct sw_flow_actions *acts;
>        struct loop_counter *loop;
> +       int error;
>
>        OVS_CB(skb)->dp_port = p;
>
>        /* Extract flow from 'skb' into 'key'. */
> -       flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
> +       error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
> +       if (unlikely(error)) {
> +               kfree_skb(skb);
> +               return;
> +       }
> +
>        if (OVS_CB(skb)->is_frag && dp->drop_frags) {
>                kfree_skb(skb);
>                stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
> @@ -1351,7 +1357,9 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute)
>        else
>                skb->protocol = htons(ETH_P_802_2);
>
> -       flow_extract(skb, execute->in_port, &key);
> +       err = flow_extract(skb, execute->in_port, &key);
> +       if (err)
> +               goto error_free_skb;
>
>        rcu_read_lock();
>        err = execute_actions(dp, skb, &key, actions->actions,
> diff --git a/datapath/flow.c b/datapath/flow.c
> index 5e362e3..cd12d35 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -55,15 +55,27 @@ static inline bool arphdr_ok(struct sk_buff *skb)
>        return pskb_may_pull(skb, nh_ofs + sizeof(struct arp_eth_header));
>  }
>
> -static inline bool iphdr_ok(struct sk_buff *skb)
> +static inline int check_iphdr(struct sk_buff *skb)
>  {
> -       int nh_ofs = skb_network_offset(skb);
> -       if (skb->len >= nh_ofs + sizeof(struct iphdr)) {
> -               int ip_len = ip_hdrlen(skb);
> -               return (ip_len >= sizeof(struct iphdr)
> -                       && pskb_may_pull(skb, nh_ofs + ip_len));
> -       }
> -       return false;
> +       unsigned int nh_ofs = skb_network_offset(skb);
> +       unsigned int ip_len;
> +
> +       if (skb->len < nh_ofs + sizeof(struct iphdr))
> +               return -EINVAL;
> +
> +       ip_len = ip_hdrlen(skb);
> +       if (ip_len < sizeof(struct iphdr) || skb->len < nh_ofs + ip_len)
> +               return -EINVAL;
> +
> +       /*
> +        * Pull enough header bytes to account for the IP header plus the
> +        * longest transport header that we parse, currently 20 bytes for TCP.
> +        */
> +       if (!pskb_may_pull(skb, min(nh_ofs + ip_len + 20, skb->len)))
> +               return -ENOMEM;
> +
> +       skb_set_transport_header(skb, nh_ofs + ip_len);
> +       return 0;
>  }

My usual complaint - can you add unlikely() to these branches?

>
>  static inline bool tcphdr_ok(struct sk_buff *skb)
> @@ -228,6 +240,8 @@ static __be16 parse_ethertype(struct sk_buff *skb)
>  *
>  * The caller must ensure that skb->len >= ETH_HLEN.
>  *
> + * Returns 0 if successful, otherwise a negative errno value.
> + *
>  * Sets OVS_CB(skb)->is_frag to %true if @skb is an IPv4 fragment, otherwise to
>  * %false.
>  */
> @@ -242,7 +256,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
>        OVS_CB(skb)->is_frag = false;
>
>        if (!pskb_may_pull(skb, min(skb->len, 64u)))
> -               return 0;
> +               return -ENOMEM;
>
>        skb_reset_mac_header(skb);
>
> @@ -260,14 +274,23 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
>        __skb_push(skb, skb->data - (unsigned char *)eth);
>
>        /* Network layer. */
> -       if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
> -               struct iphdr *nh = ip_hdr(skb);
> -               int th_ofs = skb_network_offset(skb) + nh->ihl * 4;
> +       if (key->dl_type == htons(ETH_P_IP)) {
> +               struct iphdr *nh;
> +               int error;
> +
> +               error = check_iphdr(skb);
> +               if (unlikely(error == -ENOMEM))
> +                       return -ENOMEM;
> +               else if (unlikely(error == -EINVAL)) {
> +                       skb->transport_header = skb->network_header;
> +                       return 0;
> +               }

It makes me nervous to only check for two specific error types, even
if we currently only return those two.  Why not do something like
this:

if (unlikely(error)) {

}

> +
> +               nh = ip_hdr(skb);
>                key->nw_src = nh->saddr;
>                key->nw_dst = nh->daddr;
>                key->nw_tos = nh->tos & ~INET_ECN_MASK;
>                key->nw_proto = nh->protocol;
> -               skb_set_transport_header(skb, th_ofs);
>
>                /* Transport layer. */
>                if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) {
> --
> 1.7.1
>
>




More information about the dev mailing list