[ovs-dev] [PATCH 1/2] tun_metadata: Fix coredump caused by use-after-free bug

Yifeng Sun pkusunyifeng at gmail.com
Tue Apr 7 21:18:59 UTC 2020


Thanks William for the review.

We need to use ovsrcu_postpone(tun_metadata_free, tab) because
tun_table is protected by RCU and we can only free tun_table when
all threads quiesce.

It turns out that this fix based on reference count is not beautiful, it
doesn't fit well with current RCU mechanism.

A previous commit seems fixing the samiliar issue:
254878c18874f6 (ofproto-dpif-xlate: Fix segmentation fault caused by
tun_table)

It is unsafe for frozen_state to reference tun_table because tun_table is
protected by RCU while the lifecycle of frozen_state can span several RCU
quiesce states.

As we discussed offline, we can simply nullify tun_table in frozen_state.
If frozen_state needs tun_table, the latest valid tun_table can be found
by ofproto_get_tun_tab() efficiently.

I will submit a new version.
Yifeng




On Mon, Apr 6, 2020 at 8:47 AM William Tu <u9012063 at gmail.com> wrote:
>
> On Thu, Mar 26, 2020 at 12:58:21PM -0700, Yifeng Sun wrote:
> > Tun_metadata can be referened by flow and frozen_state at the same
> > time. When ovs-vswitchd handles TLV table mod message, the involved
> > tun_metadata gets freed. The call trace to free tun_metadata is
> > shown as below:
> >
> > ofproto_run
> > - handle_openflow
> >   - handle_single_part_openflow
> >     - handle_tlv_table_mod
> >       - tun_metadata_table_mod
> >         - tun_metadata_postpone_free
> >
> > Unfortunately, this tun_metadata can be still used by some frozen_state,
> > and later on when frozen_state tries to access its tun_metadata table,
> > ovs-vswitchd crashes. The call trace to access tun_metadata from
> > frozen_state is shown as below:
> >
> > udpif_upcall_handler
> > - recv_upcalls
> >   - process_upcall
> >     - frozen_metadata_to_flow
> >
> > This patch fixes it by introducing a reference count to tun_metadata.
> > Whenever a pointer of tun_metadata is passed between flow and
> > frozen_state, we increase its reference count. Reference count
> > is decreased at deallocation.
> >
> > In present code, pointer of tun_metadata can be passed between flows.
> > It is safe because of RCU mechanism.
> >
> > VMware-BZ: #2526222
> > Signed-off-by: Yifeng Sun <pkusunyifeng at gmail.com>
> > ---
> >  lib/tun-metadata.c         | 29 ++++++++++++++++++++++++++++-
> >  lib/tun-metadata.h         |  2 ++
> >  ofproto/ofproto-dpif-rid.c |  8 ++++++++
> >  ofproto/ofproto-dpif-rid.h |  2 ++
> >  4 files changed, 40 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
> > index f8a0e19524e9..c4218a034a92 100644
> > --- a/lib/tun-metadata.c
> > +++ b/lib/tun-metadata.c
> > @@ -25,6 +25,7 @@
> >  #include "nx-match.h"
> >  #include "odp-netlink.h"
> >  #include "openvswitch/ofp-match.h"
> > +#include "ovs-atomic.h"
> >  #include "ovs-rcu.h"
> >  #include "packets.h"
> >  #include "tun-metadata.h"
> > @@ -40,6 +41,11 @@ struct tun_meta_entry {
> >  /* Maps from TLV option class+type to positions in a struct
tun_metadata's
> >   * 'opts' array.  */
> >  struct tun_table {
> > +     /* Struct tun_table can be referenced by struct frozen_state for
a long
> > +      * time. This ref_cnt protects tun_table from being freed if it
is still
> > +      * being used somewhere. */
> > +    struct ovs_refcount ref_cnt;
> > +
> >      /* TUN_METADATA<i> is stored in element <i>. */
> >      struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
> >
> > @@ -79,6 +85,24 @@ tun_key_type(uint32_t key)
> >      return key & 0xff;
> >  }
> >
> > +void
> > +tun_metadata_ref(const struct tun_table *tab)
> > +{
> > +    if (tab) {
> > +        ovs_refcount_ref(&CONST_CAST(struct tun_table *,
tab)->ref_cnt);
> > +    }
> > +}
> > +
> > +unsigned int
> > +tun_metadata_unref(const struct tun_table *tab)
> > +{
> > +    if (tab) {
> > +        return ovs_refcount_unref_relaxed(
> > +                &CONST_CAST(struct tun_table *, tab)->ref_cnt);
> > +    }
> > +    return -1;
> return -1 looks weird since it's unsigned int.
>
> > +}
> > +
> >  /* Returns a newly allocated tun_table.  If 'old_map' is nonnull then
the new
> >   * tun_table is a deep copy of the old one. */
> >  struct tun_table *
> > @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map)
> >          hmap_init(&new_map->key_hmap);
> >      }
> >
> > +    ovs_refcount_init(&new_map->ref_cnt);
> >      return new_map;
> >  }
> >
> > @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map)
> >  void
> >  tun_metadata_postpone_free(struct tun_table *tab)
> >  {
> > -    ovsrcu_postpone(tun_metadata_free, tab);
> > +    if (tun_metadata_unref(tab) == 1) {
> > +        ovsrcu_postpone(tun_metadata_free, tab);
>
> If this is the last ref and "tun_metadata_unref(tab)== 1"
> why not just call tun_metadata_free(tab)?
>
> Regards,
> William
>
> > +    }
> >  }
> >
> >  enum ofperr
> > diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
> > index 7dad9504b8da..933021a0f679 100644
> > --- a/lib/tun-metadata.h
> > +++ b/lib/tun-metadata.h
> > @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod;
> >  struct ofputil_tlv_table_reply;
> >  struct tun_table;
> >
> > +void tun_metadata_ref(const struct tun_table *tab);
> > +unsigned int tun_metadata_unref(const struct tun_table *tab);
> >  struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
> >  void tun_metadata_free(struct tun_table *);
> >  void tun_metadata_postpone_free(struct tun_table *);
> > diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> > index 29aafc2c0b40..d479e53d9b2d 100644
> > --- a/ofproto/ofproto-dpif-rid.c
> > +++ b/ofproto/ofproto-dpif-rid.c
> > @@ -201,6 +201,7 @@ static void
> >  frozen_state_clone(struct frozen_state *new, const struct frozen_state
*old)
> >  {
> >      *new = *old;
> > +    tun_metadata_ref(old->metadata.tunnel.metadata.tab);
> >      new->stack = (new->stack_size
> >                    ? xmemdup(new->stack, new->stack_size)
> >                    : NULL);
> > @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new,
const struct frozen_state *old)
> >  static void
> >  frozen_state_free(struct frozen_state *state)
> >  {
> > +    struct tun_table *tab;
> > +
> >      free(state->stack);
> >      free(state->ofpacts);
> >      free(state->action_set);
> >      free(state->userdata);
> > +
> > +    tab = CONST_CAST(struct tun_table *,
state->metadata.tunnel.metadata.tab);
> > +    if (tun_metadata_unref(tab) == 1) {
> > +        tun_metadata_free(tab);
> > +    }
> >  }
> >
> >  /* Allocate a unique recirculation id for the given set of flow
metadata.
> > diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> > index e5d02caf28a3..1fefbf53b94a 100644
> > --- a/ofproto/ofproto-dpif-rid.h
> > +++ b/ofproto/ofproto-dpif-rid.h
> > @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata
*md,
> >  {
> >      memset(md, 0, sizeof *md);
> >      md->tunnel = flow->tunnel;
> > +    tun_metadata_ref(flow->tunnel.metadata.tab);
> >      md->metadata = flow->metadata;
> >      memcpy(md->regs, flow->regs, sizeof md->regs);
> >      md->in_port = flow->in_port.ofp_port;
> > @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct
frozen_metadata *md,
> >                          struct flow *flow)
> >  {
> >      flow->tunnel = md->tunnel;
> > +    tun_metadata_ref(md->tunnel.metadata.tab);
> >      flow->metadata = md->metadata;
> >      memcpy(flow->regs, md->regs, sizeof flow->regs);
> >      flow->in_port.ofp_port = md->in_port;
> > --
> > 2.7.4
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list