[ovs-dev] [PATCH 1/2] Avoid packet drop on LACP bond after link up
Nitin Katiyar
nitin.katiyar at ericsson.com
Wed Feb 27 08:37:36 UTC 2019
Problem:
========
During port DOWN->UP of link (slave) in a LACP bond, after receiving the
LACPDU with SYNC set for both actor and partner, the bond-slave remains
"disabled" until OVS main thread runs LACP state machine and eventually
"enables" the bond-slave. With this, we have observed delays in the order
of 350ms and packets are dropped in OVS due to bond-admissibility check
(packets received on slave in "disabled" state are dropped).
Fix:
====
When a LACPDU is received, evaluate whether LACP slave can be enabled
(slave_may_enable()) and set LACP slave's may_enable from the datapath
thread itself. When may_enable = TRUE, it means L1 state is UP and
LACP-SYNC is done and it is waiting for the main thread to enable the
slave. Relax the check in bond_check_admissibility() to check for both
"enable" and "may_enable" of the LACP slave. This would avoid dropping
of packets until the main thread enables the slave from bundle_run().
Signed-off-by: Nitin Katiyar <nitin.katiyar at ericsson.com>
Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc at gmail.com>
Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc at gmail.com>
---
lib/lacp.c | 12 ++++++++++--
lib/lacp.h | 3 ++-
ofproto/bond.c | 16 +++++++++++++---
ofproto/ofproto-dpif-xlate.c | 11 ++++++++++-
4 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/lib/lacp.c b/lib/lacp.c
index d6b36aa..7ac85f9 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -154,6 +154,7 @@ static struct slave *slave_lookup(const struct lacp *, const void *slave)
OVS_REQUIRES(mutex);
static bool info_tx_equal(struct lacp_info *, struct lacp_info *)
OVS_REQUIRES(mutex);
+static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex);
static unixctl_cb_func lacp_unixctl_show;
static unixctl_cb_func lacp_unixctl_show_stats;
@@ -324,8 +325,8 @@ lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex)
/* Processes 'packet' which was received on 'slave_'. This function should be
* called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP.
*/
-void
-lacp_process_packet(struct lacp *lacp, const void *slave_,
+bool
+lacp_process_packet(struct lacp *lacp, const void *bond, const void *slave_,
const struct dp_packet *packet)
OVS_EXCLUDED(mutex)
{
@@ -333,6 +334,7 @@ lacp_process_packet(struct lacp *lacp, const void *slave_,
const struct lacp_pdu *pdu;
long long int tx_rate;
struct slave *slave;
+ bool lacp_may_enable = false;
lacp_lock();
slave = slave_lookup(lacp, slave_);
@@ -362,8 +364,14 @@ lacp_process_packet(struct lacp *lacp, const void *slave_,
slave->partner = pdu->actor;
}
+ /* Evaluate may_enable here to avoid dropping of packets till main thread
+ * sets may_enable to true. */
+ lacp_may_enable = slave_may_enable__(slave);
+
out:
lacp_unlock();
+
+ return lacp_may_enable;
}
/* Returns the lacp_status of the given 'lacp' object (which may be NULL). */
diff --git a/lib/lacp.h b/lib/lacp.h
index f35cff5..1505c2c 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -46,7 +46,8 @@ struct lacp *lacp_ref(const struct lacp *);
void lacp_configure(struct lacp *, const struct lacp_settings *);
bool lacp_is_active(const struct lacp *);
-void lacp_process_packet(struct lacp *, const void *slave,
+bool lacp_process_packet(struct lacp *, const void *bond,
+ const void *slave,
const struct dp_packet *packet);
enum lacp_status lacp_status(const struct lacp *);
diff --git a/ofproto/bond.c b/ofproto/bond.c
index d2a8b1f..cd6fd33 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -794,6 +794,7 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
{
enum bond_verdict verdict = BV_DROP;
struct bond_slave *slave;
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
ovs_rwlock_rdlock(&rwlock);
slave = bond_slave_lookup(bond, slave_);
@@ -811,7 +812,11 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
* drop all incoming traffic except if lacp_fallback_ab is enabled. */
switch (bond->lacp_status) {
case LACP_NEGOTIATED:
- verdict = slave->enabled ? BV_ACCEPT : BV_DROP;
+ /* To reduce packet-drops due to delay in enabling of slave (post
+ * LACP-SYNC), from main thread, check for may_enable as well.
+ * When may_enable is TRUE, it means LACP is UP and waiting for the
+ * main thread to run LACP state machine and enable the slave. */
+ verdict = (slave->enabled || slave->may_enable) ? BV_ACCEPT : BV_DROP;
goto out;
case LACP_CONFIGURED:
if (!bond->lacp_fallback_ab) {
@@ -847,8 +852,6 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
/* Drop all packets which arrive on backup slaves. This is similar to
* how Linux bonding handles active-backup bonds. */
if (bond->active_slave != slave) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-
VLOG_DBG_RL(&rl, "active-backup bond received packet on backup"
" slave (%s) destined for " ETH_ADDR_FMT,
slave->name, ETH_ADDR_ARGS(eth_dst));
@@ -870,6 +873,13 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
OVS_NOT_REACHED();
out:
+ if (slave && (verdict != BV_ACCEPT)) {
+ VLOG_DBG_RL(&rl, "slave= %s actv-slave= %d may_enable %d enable %d "
+ "LACP %d verdict(A/D/M=0/1/2) %d\n", slave->name,
+ (bond->active_slave == slave), slave->may_enable,
+ slave->enabled, bond->lacp_status, verdict);
+ }
+
ovs_rwlock_unlock(&rwlock);
return verdict;
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index acd4817..a4a3a2b 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3288,6 +3288,7 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport)
const struct xbridge *xbridge = ctx->xbridge;
const struct dp_packet *packet = ctx->xin->packet;
enum slow_path_reason slow;
+ bool lacp_may_enable;
if (!xport) {
slow = 0;
@@ -3308,7 +3309,15 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport)
} else if (xport->xbundle && xport->xbundle->lacp
&& flow->dl_type == htons(ETH_TYPE_LACP)) {
if (packet) {
- lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
+ lacp_may_enable = lacp_process_packet(xport->xbundle->lacp,
+ xport->xbundle->bond,
+ xport->ofport, packet);
+ /* Update LACP status in bond-slave to avoid packet-drops until
+ * LACP state machine is run by the main thread. */
+ if (xport->xbundle->bond && lacp_may_enable) {
+ bond_slave_set_may_enable(xport->xbundle->bond, xport->ofport,
+ lacp_may_enable);
+ }
}
slow = SLOW_LACP;
} else if ((xbridge->stp || xbridge->rstp) &&
--
1.9.1
More information about the dev
mailing list