[ovs-dev] [recirc fix branch-2.3 V2] recirculation: Map recirc_id to ofproto_dpif.
Alex Wang
alexw at nicira.com
Tue Dec 23 18:59:24 UTC 2014
Got an offline ACK from Ethan,
Applied to patch to branch-2.3
On Tue, Dec 23, 2014 at 10:42 AM, Alex Wang <alexw at nicira.com> wrote:
> After commit 0c7812e5e (recirculation: Do not drop packet when
> there is no match from internal table.), if flow keys are modified
> before the recirculation action (e.g. set vlan ID), the miss
> handling of recirc'ed packets may not reach the intended
> 'ofproto_dpif' which has rules looking up the 'recirc_id's,
> causing drops.
>
> This commit adds an unittest that captures this bug. Moreover,
> to solve this bug, this commit checks mapping between 'recirc_id'
> and the corresponding 'ofproto_dpif', and makes sure that the
> miss handling of recirc'ed packets are done with the correct
> 'ofproto_dpif'.
>
> Signed-off-by: Alex Wang <alexw at nicira.com>
> Acked-by: Andy Zhou <azhou at nicira.com>
> Acked-by: Jarno Rajahalme <jrajahalme at nicira.com>
>
> ---
> PATCH->V2:
> - ovsrcu postpone the free of dpif_backer_recirc_node.
> - fix unittest interminttent failure.
>
> Diff between backport & master:
> - move + adjust the fix logic in xlate_lookup() (on master) to
> xlate_receive().
> - change the cmap (for mapping between 'recirc_id' and 'ofproto') to hmap,
> and protect the lookup with mutex.
> - unittest adjust to work with branch-2.3.
> ---
> ofproto/ofproto-dpif-xlate.c | 45 ++++++++++++++++++++--
> ofproto/ofproto-dpif.c | 85
> ++++++++++++++++++++++++++++++++++++++++--
> ofproto/ofproto-dpif.h | 12 ++++++
> tests/ofproto-dpif.at | 68 +++++++++++++++++++++++++++++++++
> 4 files changed, 203 insertions(+), 7 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 0fc5443..ee353e3 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -634,6 +634,8 @@ xlate_receive(const struct dpif_backer *backer, struct
> ofpbuf *packet,
> struct dpif_sflow **sflow, struct netflow **netflow,
> odp_port_t *odp_in_port)
> {
> + struct ofproto_dpif *recv_ofproto = NULL;
> + struct ofproto_dpif *recirc_ofproto = NULL;
> const struct xport *xport;
> int error = ENODEV;
>
> @@ -655,6 +657,7 @@ xlate_receive(const struct dpif_backer *backer, struct
> ofpbuf *packet,
> if (!xport) {
> goto exit;
> }
> + recv_ofproto = xport->xbridge->ofproto;
>
> if (vsp_adjust_flow(xport->xbridge->ofproto, flow)) {
> if (packet) {
> @@ -667,20 +670,54 @@ xlate_receive(const struct dpif_backer *backer,
> struct ofpbuf *packet,
> }
> error = 0;
>
> + /* When recirc_id is set in 'flow', checks whether the ofproto_dpif
> that
> + * corresponds to the recirc_id is same as the receiving bridge. If
> they
> + * are the same, uses the 'recv_ofproto' and keeps the 'ofp_in_port'
> as
> + * assigned. Otherwise, uses the 'recirc_ofproto' that owns
> recirc_id and
> + * assigns OFPP_NONE to 'ofp_in_port'. Doing this is in that, the
> + * recirculated flow must be processced by the ofproto which
> originates
> + * the recirculation, and as bridges can only see their own ports, the
> + * in_port of the 'recv_ofproto' should not be passed to the
> + * 'recirc_ofproto'.
> + *
> + * Admittedly, setting the 'ofp_in_port' to OFPP_NONE limits the
> + * 'recirc_ofproto' from meaningfully matching on in_port of
> recirculated
> + * flow, and should be fixed in the near future.
> + *
> + * TODO: Restore the original patch port.
> + */
> + if (flow->recirc_id) {
> + recirc_ofproto = ofproto_dpif_recirc_get_ofproto(backer,
> + flow->recirc_id);
> + /* Returns error if could not find recirculation bridge */
> + if (!recirc_ofproto) {
> + error = ENOENT;
> + goto exit;
> + }
> +
> + if (recv_ofproto != recirc_ofproto) {
> + xport = NULL;
> + flow->in_port.ofp_port = OFPP_NONE;
> + if (odp_in_port) {
> + *odp_in_port = ODPP_NONE;
> + }
> + }
> + }
> +
> if (ofproto) {
> - *ofproto = xport->xbridge->ofproto;
> + *ofproto = xport ? recv_ofproto : recirc_ofproto;
> }
>
> if (ipfix) {
> - *ipfix = dpif_ipfix_ref(xport->xbridge->ipfix);
> + *ipfix = xport ? dpif_ipfix_ref(xport->xbridge->ipfix) : NULL;
> }
>
> if (sflow) {
> - *sflow = dpif_sflow_ref(xport->xbridge->sflow);
> + *sflow = xport ? dpif_sflow_ref(xport->xbridge->sflow) : NULL;
> }
>
> if (netflow) {
> - *netflow = netflow_ref(xport->xbridge->netflow);
> + *netflow = xport ? netflow_ref(xport->xbridge->netflow) : NULL;
> }
>
> exit:
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 7204ccf..6df6b83 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -57,6 +57,7 @@
> #include "ofproto-dpif-sflow.h"
> #include "ofproto-dpif-upcall.h"
> #include "ofproto-dpif-xlate.h"
> +#include "ovs-rcu.h"
> #include "poll-loop.h"
> #include "seq.h"
> #include "simap.h"
> @@ -238,6 +239,13 @@ COVERAGE_DEFINE(rev_port_toggled);
> COVERAGE_DEFINE(rev_flow_table);
> COVERAGE_DEFINE(rev_mac_learning);
>
> +/* Stores mapping between 'recirc_id' and 'ofproto-dpif'. */
> +struct dpif_backer_recirc_node {
> + struct hmap_node hmap_node;
> + struct ofproto_dpif *ofproto;
> + uint32_t recirc_id;
> +};
> +
> /* All datapaths of a given type share a single dpif backer instance. */
> struct dpif_backer {
> char *type;
> @@ -256,6 +264,8 @@ struct dpif_backer {
>
> /* Recirculation. */
> struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */
> + struct hmap recirc_map; /* Map of 'recirc_id's to 'ofproto's.
> */
> + struct ovs_mutex recirc_mutex; /* Protects 'recirc_map'. */
> bool enable_recirc; /* True if the datapath supports recirculation
> */
>
> /* True if the datapath supports variable-length
> @@ -794,6 +804,30 @@ dealloc(struct ofproto *ofproto_)
> free(ofproto);
> }
>
> +/* Called when 'ofproto' is destructed. Checks for and clears any
> + * recirc_id leak. */
> +static void
> +dpif_backer_recirc_clear_ofproto(struct dpif_backer *backer,
> + struct ofproto_dpif *ofproto)
> +{
> + struct dpif_backer_recirc_node *node;
> +
> + ovs_mutex_lock(&backer->recirc_mutex);
> + HMAP_FOR_EACH (node, hmap_node, &backer->recirc_map) {
> + if (node->ofproto == ofproto) {
> + VLOG_ERR("recirc_id %"PRIu32", not freed when ofproto (%s) "
> + "is destructed", node->recirc_id, ofproto->up.name);
> + hmap_remove(&backer->recirc_map, &node->hmap_node);
> + /* Does not matter whether directly free or use
> ovsrcu_postpone,
> + * since all datapath flows are already purged before calling
> this
> + * function, and no 'recirc_id' could be associated to
> 'ofproto'.
> + */
> + ovsrcu_postpone(free, node);
> + }
> + }
> + ovs_mutex_unlock(&backer->recirc_mutex);
> +}
> +
> static void
> close_dpif_backer(struct dpif_backer *backer)
> {
> @@ -810,6 +844,8 @@ close_dpif_backer(struct dpif_backer *backer)
> hmap_destroy(&backer->odp_to_ofport_map);
> shash_find_and_delete(&all_dpif_backers, backer->type);
> recirc_id_pool_destroy(backer->rid_pool);
> + hmap_destroy(&backer->recirc_map);
> + ovs_mutex_destroy(&backer->recirc_mutex);
> free(backer->type);
> dpif_close(backer->dpif);
> free(backer);
> @@ -921,6 +957,8 @@ open_dpif_backer(const char *type, struct dpif_backer
> **backerp)
> backer->variable_length_userdata =
> check_variable_length_userdata(backer);
> backer->max_mpls_depth = check_max_mpls_depth(backer);
> backer->rid_pool = recirc_id_pool_create();
> + ovs_mutex_init(&backer->recirc_mutex);
> + hmap_init(&backer->recirc_map);
>
> error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
> if (error) {
> @@ -1306,6 +1344,8 @@ destruct(struct ofproto *ofproto_)
> }
> guarded_list_destroy(&ofproto->pins);
>
> + dpif_backer_recirc_clear_ofproto(ofproto->backer, ofproto);
> +
> mbridge_unref(ofproto->mbridge);
>
> netflow_unref(ofproto->netflow);
> @@ -4758,20 +4798,59 @@ odp_port_to_ofp_port(const struct ofproto_dpif
> *ofproto, odp_port_t odp_port)
> }
> }
>
> +struct ofproto_dpif *
> +ofproto_dpif_recirc_get_ofproto(const struct dpif_backer *backer,
> + uint32_t recirc_id)
> +{
> + struct dpif_backer_recirc_node *node;
> +
> + ovs_mutex_lock(&backer->recirc_mutex);
> + node = CONTAINER_OF(hmap_first_with_hash(&backer->recirc_map,
> recirc_id),
> + struct dpif_backer_recirc_node, hmap_node);
> + ovs_mutex_unlock(&backer->recirc_mutex);
> +
> + return node ? node->ofproto : NULL;
> +}
> +
> uint32_t
> ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
> {
> struct dpif_backer *backer = ofproto->backer;
> + uint32_t recirc_id = recirc_id_alloc(backer->rid_pool);
> +
> + if (recirc_id) {
> + struct dpif_backer_recirc_node *node = xmalloc(sizeof *node);
> +
> + node->recirc_id = recirc_id;
> + node->ofproto = ofproto;
>
> - return recirc_id_alloc(backer->rid_pool);
> + ovs_mutex_lock(&backer->recirc_mutex);
> + hmap_insert(&backer->recirc_map, &node->hmap_node,
> node->recirc_id);
> + ovs_mutex_unlock(&backer->recirc_mutex);
> + }
> +
> + return recirc_id;
> }
>
> void
> ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t
> recirc_id)
> {
> struct dpif_backer *backer = ofproto->backer;
> -
> - recirc_id_free(backer->rid_pool, recirc_id);
> + struct dpif_backer_recirc_node *node;
> +
> + ovs_mutex_lock(&backer->recirc_mutex);
> + node = CONTAINER_OF(hmap_first_with_hash(&backer->recirc_map,
> recirc_id),
> + struct dpif_backer_recirc_node, hmap_node);
> + if (node) {
> + hmap_remove(&backer->recirc_map, &node->hmap_node);
> + ovs_mutex_unlock(&backer->recirc_mutex);
> + recirc_id_free(backer->rid_pool, node->recirc_id);
> + /* RCU postpone the free, since other threads may be referring
> + * to 'node' at same time. */
> + ovsrcu_postpone(free, node);
> + } else {
> + ovs_mutex_unlock(&backer->recirc_mutex);
> + }
> }
>
> int
> diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
> index 6f77b1a..cee4723 100644
> --- a/ofproto/ofproto-dpif.h
> +++ b/ofproto/ofproto-dpif.h
> @@ -226,8 +226,20 @@ struct ofport_dpif *odp_port_to_ofport(const struct
> dpif_backer *, odp_port_t);
> * Post recirculation data path flows are managed like other data path
> flows.
> * They are created on demand. Miss handling, stats collection and
> revalidation
> * work the same way as regular flows.
> + *
> + * If the bridge which originates the recirculation is different from the
> bridge
> + * that receives the post recirculation packet (e.g. when patch port is
> used),
> + * the packet will be processed directly by the recirculation bridge with
> + * in_port set to OFPP_NONE. Admittedly, doing this limits the
> recirculation
> + * bridge from matching on in_port of post recirculation packets, and
> will be
> + * fixed in the near future.
> + *
> + * TODO: Always restore the correct in_port.
> + *
> */
>
> +struct ofproto_dpif *ofproto_dpif_recirc_get_ofproto(const struct
> dpif_backer *ofproto,
> + uint32_t recirc_id);
> uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
> void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t
> recirc_id);
> int ofproto_dpif_add_internal_flow(struct ofproto_dpif *,
> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
> index 1fcd937..6fb0dce 100644
> --- a/tests/ofproto-dpif.at
> +++ b/tests/ofproto-dpif.at
> @@ -195,6 +195,74 @@ AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp >
> br1_flows.txt])
> AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 24])
> AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 24])
> AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 24])
> +
> +OVS_VSWITCHD_STOP()
> +AT_CLEANUP
> +
> +# Makes sure recirculation does not change the way packet is handled.
> +AT_SETUP([ofproto-dpif, balance-tcp bonding, different recirc flow ])
> +OVS_VSWITCHD_START(
> + [add-bond br0 bond0 p1 p2 bond_mode=balance-tcp lacp=active \
> + other-config:lacp-time=fast other-config:bond-rebalance-interval=0
> -- \
> + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock
> ofport_request=1 -- \
> + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock
> ofport_request=2 -- \
> + add-br br1 -- \
> + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
> + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
> + fail-mode=standalone -- \
> + add-bond br1 bond1 p3 p4 bond_mode=balance-tcp lacp=active \
> + other-config:lacp-time=fast other-config:bond-rebalance-interval=0
> -- \
> + set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock
> ofport_request=3 -- \
> + set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock
> ofport_request=4 -- \
> + add-port br1 br1- -- set interface br1- type=patch options:peer=br1+
> ofport_request=100 -- \
> + add-br br-int -- \
> + set bridge br-int other-config:hwaddr=aa:77:aa:77:00:00 -- \
> + set bridge br-int datapath-type=dummy other-config:datapath-id=1235 \
> + fail-mode=secure -- \
> + add-port br-int br1+ -- set interface br1+ type=patch
> options:peer=br1- ofport_request=101 -- \
> + add-port br-int p5 -- set interface p5 ofport_request=5 type=dummy
> +])
> +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
> +])
> +
> +# Waits for all ifaces enabled.
> +OVS_WAIT_UNTIL([test `ovs-appctl bond/show | grep -- "may_enable: true"
> | wc -l` -ge 4])
> +
> +# The dl_vlan flow should not be ever matched,
> +# since recirculation should not change the flow handling.
> +AT_DATA([flows.txt], [dnl
> +table=0 priority=1 in_port=5 actions=mod_vlan_vid:1,output(101)
> +table=0 priority=2 in_port=5 dl_vlan=1 actions=drop
> +])
> +AT_CHECK([ovs-ofctl add-flows br-int flows.txt])
> +
> +# Sends a packet to trigger recirculation.
> +# Should generate recirc_id(0x12d),dp_hash(0xcf/0xff).
> +AT_CHECK([ovs-appctl netdev-dummy/receive p5
> "in_port(5),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no)"])
> +
> +ovs-appctl time/warp 5000 100
> +
> +# Forces revalidators to update all stats.
> +AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [dnl
> +megaflows disabled
> +])
> +AT_CHECK([ovs-appctl upcall/enable-megaflows], [0], [dnl
> +megaflows enabled
> +])
> +
> +# Checks the flow stats in br1, should only be one flow with non-zero
> +# 'n_packets' from internal table.
> +AT_CHECK([ovs-appctl bridge/dump-flows br1 | ofctl_strip | grep --
> "n_packets" | grep -- "table_id" | sed -e 's/output:[[0-9]]\+/output/'],
> [0], [dnl
> +table_id=254, n_packets=1, n_bytes=64,
> priority=20,recirc_id=0x12d,dp_hash=0xcf/0xff,actions=output
> +])
> +
> +# Checks the flow stats in br-int, should be only one match.
> +AT_CHECK([ovs-ofctl dump-flows br-int | ofctl_strip | sort], [0], [dnl
> + n_packets=1, n_bytes=60, priority=1,in_port=5
> actions=mod_vlan_vid:1,output:101
> + priority=2,in_port=5,dl_vlan=1 actions=drop
> +NXST_FLOW reply:
> +])
> +
> OVS_VSWITCHD_STOP()
> AT_CLEANUP
>
> --
> 1.7.9.5
>
>
More information about the dev
mailing list