[ovs-dev] [PATCH 1/2] tun_metadata: Fix coredump caused by use-after-free bug
Yifeng Sun
pkusunyifeng at gmail.com
Tue Apr 7 21:18:59 UTC 2020
Thanks William for the review.
We need to use ovsrcu_postpone(tun_metadata_free, tab) because
tun_table is protected by RCU and we can only free tun_table when
all threads quiesce.
It turns out that this fix based on reference count is not beautiful, it
doesn't fit well with current RCU mechanism.
A previous commit seems fixing the samiliar issue:
254878c18874f6 (ofproto-dpif-xlate: Fix segmentation fault caused by
tun_table)
It is unsafe for frozen_state to reference tun_table because tun_table is
protected by RCU while the lifecycle of frozen_state can span several RCU
quiesce states.
As we discussed offline, we can simply nullify tun_table in frozen_state.
If frozen_state needs tun_table, the latest valid tun_table can be found
by ofproto_get_tun_tab() efficiently.
I will submit a new version.
Yifeng
On Mon, Apr 6, 2020 at 8:47 AM William Tu <u9012063 at gmail.com> wrote:
>
> On Thu, Mar 26, 2020 at 12:58:21PM -0700, Yifeng Sun wrote:
> > Tun_metadata can be referened by flow and frozen_state at the same
> > time. When ovs-vswitchd handles TLV table mod message, the involved
> > tun_metadata gets freed. The call trace to free tun_metadata is
> > shown as below:
> >
> > ofproto_run
> > - handle_openflow
> > - handle_single_part_openflow
> > - handle_tlv_table_mod
> > - tun_metadata_table_mod
> > - tun_metadata_postpone_free
> >
> > Unfortunately, this tun_metadata can be still used by some frozen_state,
> > and later on when frozen_state tries to access its tun_metadata table,
> > ovs-vswitchd crashes. The call trace to access tun_metadata from
> > frozen_state is shown as below:
> >
> > udpif_upcall_handler
> > - recv_upcalls
> > - process_upcall
> > - frozen_metadata_to_flow
> >
> > This patch fixes it by introducing a reference count to tun_metadata.
> > Whenever a pointer of tun_metadata is passed between flow and
> > frozen_state, we increase its reference count. Reference count
> > is decreased at deallocation.
> >
> > In present code, pointer of tun_metadata can be passed between flows.
> > It is safe because of RCU mechanism.
> >
> > VMware-BZ: #2526222
> > Signed-off-by: Yifeng Sun <pkusunyifeng at gmail.com>
> > ---
> > lib/tun-metadata.c | 29 ++++++++++++++++++++++++++++-
> > lib/tun-metadata.h | 2 ++
> > ofproto/ofproto-dpif-rid.c | 8 ++++++++
> > ofproto/ofproto-dpif-rid.h | 2 ++
> > 4 files changed, 40 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
> > index f8a0e19524e9..c4218a034a92 100644
> > --- a/lib/tun-metadata.c
> > +++ b/lib/tun-metadata.c
> > @@ -25,6 +25,7 @@
> > #include "nx-match.h"
> > #include "odp-netlink.h"
> > #include "openvswitch/ofp-match.h"
> > +#include "ovs-atomic.h"
> > #include "ovs-rcu.h"
> > #include "packets.h"
> > #include "tun-metadata.h"
> > @@ -40,6 +41,11 @@ struct tun_meta_entry {
> > /* Maps from TLV option class+type to positions in a struct
tun_metadata's
> > * 'opts' array. */
> > struct tun_table {
> > + /* Struct tun_table can be referenced by struct frozen_state for
a long
> > + * time. This ref_cnt protects tun_table from being freed if it
is still
> > + * being used somewhere. */
> > + struct ovs_refcount ref_cnt;
> > +
> > /* TUN_METADATA<i> is stored in element <i>. */
> > struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
> >
> > @@ -79,6 +85,24 @@ tun_key_type(uint32_t key)
> > return key & 0xff;
> > }
> >
> > +void
> > +tun_metadata_ref(const struct tun_table *tab)
> > +{
> > + if (tab) {
> > + ovs_refcount_ref(&CONST_CAST(struct tun_table *,
tab)->ref_cnt);
> > + }
> > +}
> > +
> > +unsigned int
> > +tun_metadata_unref(const struct tun_table *tab)
> > +{
> > + if (tab) {
> > + return ovs_refcount_unref_relaxed(
> > + &CONST_CAST(struct tun_table *, tab)->ref_cnt);
> > + }
> > + return -1;
> return -1 looks weird since it's unsigned int.
>
> > +}
> > +
> > /* Returns a newly allocated tun_table. If 'old_map' is nonnull then
the new
> > * tun_table is a deep copy of the old one. */
> > struct tun_table *
> > @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map)
> > hmap_init(&new_map->key_hmap);
> > }
> >
> > + ovs_refcount_init(&new_map->ref_cnt);
> > return new_map;
> > }
> >
> > @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map)
> > void
> > tun_metadata_postpone_free(struct tun_table *tab)
> > {
> > - ovsrcu_postpone(tun_metadata_free, tab);
> > + if (tun_metadata_unref(tab) == 1) {
> > + ovsrcu_postpone(tun_metadata_free, tab);
>
> If this is the last ref and "tun_metadata_unref(tab)== 1"
> why not just call tun_metadata_free(tab)?
>
> Regards,
> William
>
> > + }
> > }
> >
> > enum ofperr
> > diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
> > index 7dad9504b8da..933021a0f679 100644
> > --- a/lib/tun-metadata.h
> > +++ b/lib/tun-metadata.h
> > @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod;
> > struct ofputil_tlv_table_reply;
> > struct tun_table;
> >
> > +void tun_metadata_ref(const struct tun_table *tab);
> > +unsigned int tun_metadata_unref(const struct tun_table *tab);
> > struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
> > void tun_metadata_free(struct tun_table *);
> > void tun_metadata_postpone_free(struct tun_table *);
> > diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> > index 29aafc2c0b40..d479e53d9b2d 100644
> > --- a/ofproto/ofproto-dpif-rid.c
> > +++ b/ofproto/ofproto-dpif-rid.c
> > @@ -201,6 +201,7 @@ static void
> > frozen_state_clone(struct frozen_state *new, const struct frozen_state
*old)
> > {
> > *new = *old;
> > + tun_metadata_ref(old->metadata.tunnel.metadata.tab);
> > new->stack = (new->stack_size
> > ? xmemdup(new->stack, new->stack_size)
> > : NULL);
> > @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new,
const struct frozen_state *old)
> > static void
> > frozen_state_free(struct frozen_state *state)
> > {
> > + struct tun_table *tab;
> > +
> > free(state->stack);
> > free(state->ofpacts);
> > free(state->action_set);
> > free(state->userdata);
> > +
> > + tab = CONST_CAST(struct tun_table *,
state->metadata.tunnel.metadata.tab);
> > + if (tun_metadata_unref(tab) == 1) {
> > + tun_metadata_free(tab);
> > + }
> > }
> >
> > /* Allocate a unique recirculation id for the given set of flow
metadata.
> > diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> > index e5d02caf28a3..1fefbf53b94a 100644
> > --- a/ofproto/ofproto-dpif-rid.h
> > +++ b/ofproto/ofproto-dpif-rid.h
> > @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata
*md,
> > {
> > memset(md, 0, sizeof *md);
> > md->tunnel = flow->tunnel;
> > + tun_metadata_ref(flow->tunnel.metadata.tab);
> > md->metadata = flow->metadata;
> > memcpy(md->regs, flow->regs, sizeof md->regs);
> > md->in_port = flow->in_port.ofp_port;
> > @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct
frozen_metadata *md,
> > struct flow *flow)
> > {
> > flow->tunnel = md->tunnel;
> > + tun_metadata_ref(md->tunnel.metadata.tab);
> > flow->metadata = md->metadata;
> > memcpy(flow->regs, md->regs, sizeof flow->regs);
> > flow->in_port.ofp_port = md->in_port;
> > --
> > 2.7.4
> >
> > _______________________________________________
> > dev mailing list
> > dev at openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list