[ovs-dev] [PATCH v2 2/2] Don't send or receive LACP PDUs when carrier state of slave is down
Nitin Katiyar
nitin.katiyar at ericsson.com
Sun Jun 2 16:14:34 UTC 2019
Problem:
========
On certain Fortville NICs it has been observed that PHY UP detection can
get delayed (sometimes up to 4-5 secs). When the driver fails to fetch
PHY status as UP even though its actually UP, LACP packets can get
exchanged and LACP slave brought UP. In such a case, the remote end
would start sending traffic on that slave, but OVS drops it, as the
bond-slave is not yet enabled due to PHY DOWN.
Fix:
====
The main intention here is delay LACP negotiation until carrier (PHY)
status is read as UP.
1. When carrier state is DOWN, do not send any LACP PDUs and
drop any received LACP PDUs.
2. When LACP PDU is received, trigger re-checking
of carrier-state (in port_run()) by incrementing the connectivity
sequence number to find out the true carrier state as fast as
possible.
Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc at gmail.com>
Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc at gmail.com>
Signed-off-by: Nitin Katiyar <nitin.katiyar at ericsson.com>
---
lib/lacp.c | 35 ++++++++++++++++++++++++++++++-----
lib/lacp.h | 3 ++-
ofproto/ofproto-dpif.c | 14 +++++++-------
3 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/lib/lacp.c b/lib/lacp.c
index e768012..16c823b 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -122,6 +122,7 @@ struct slave {
enum slave_status status; /* Slave status. */
bool attached; /* Attached. Traffic may flow. */
+ bool carrier_up; /* Carrier state of link. */
struct lacp_info partner; /* Partner information. */
struct lacp_info ntt_actor; /* Used to decide if we Need To Transmit. */
struct timer tx; /* Next message transmission timer. */
@@ -350,6 +351,16 @@ lacp_process_packet(struct lacp *lacp, const void *slave_,
goto out;
}
+ /* On some NICs L1 state reporting is slow. In case LACP packets are
+ * received while carrier (L1) state is still down, drop the LACP PDU and
+ * trigger re-checking of L1 state. */
+ if (!slave->carrier_up) {
+ VLOG_INFO_RL(&rl, "%s: carrier state is DOWN,"
+ " dropping received LACP PDU.", slave->name);
+ seq_change(connectivity_seq_get());
+ goto out;
+ }
+
slave->status = LACP_CURRENT;
tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * tx_rate);
@@ -456,7 +467,8 @@ lacp_slave_unregister(struct lacp *lacp, const void *slave_)
/* This function should be called whenever the carrier status of 'slave_' has
* changed. If 'lacp' is null, this function has no effect.*/
void
-lacp_slave_carrier_changed(const struct lacp *lacp, const void *slave_)
+lacp_slave_carrier_changed(const struct lacp *lacp, const void *slave_,
+ bool carrier_up)
OVS_EXCLUDED(mutex)
{
struct slave *slave;
@@ -473,7 +485,11 @@ lacp_slave_carrier_changed(const struct lacp *lacp, const void *slave_)
if (slave->status == LACP_CURRENT || slave->lacp->active) {
slave_set_expired(slave);
}
- slave->count_carrier_changed++;
+
+ if (slave->carrier_up != carrier_up) {
+ slave->carrier_up = carrier_up;
+ slave->count_carrier_changed++;
+ }
out:
lacp_unlock();
@@ -498,11 +514,18 @@ lacp_slave_may_enable(const struct lacp *lacp, const void *slave_)
{
if (lacp) {
struct slave *slave;
- bool ret;
+ bool ret = false;
lacp_lock();
slave = slave_lookup(lacp, slave_);
- ret = slave ? slave_may_enable__(slave) : false;
+ if (slave) {
+ /* It is only called when carrier is up. So, enable slave's
+ * carrier state if it is currently down. */
+ if (!slave->carrier_up) {
+ slave->carrier_up = true;
+ }
+ ret = slave_may_enable__(slave);
+ }
lacp_unlock();
return ret;
} else {
@@ -820,7 +843,9 @@ slave_get_priority(struct slave *slave, struct lacp_info *priority)
static bool
slave_may_tx(const struct slave *slave) OVS_REQUIRES(mutex)
{
- return slave->lacp->active || slave->status != LACP_DEFAULTED;
+ /* Check for L1 state as well as LACP state. */
+ return (slave->carrier_up) && ((slave->lacp->active) ||
+ (slave->status != LACP_DEFAULTED));
}
static struct slave *
diff --git a/lib/lacp.h b/lib/lacp.h
index 0dfaef0..d731ae9 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -60,7 +60,8 @@ struct lacp_slave_settings {
void lacp_slave_register(struct lacp *, void *slave_,
const struct lacp_slave_settings *);
void lacp_slave_unregister(struct lacp *, const void *slave);
-void lacp_slave_carrier_changed(const struct lacp *, const void *slave);
+void lacp_slave_carrier_changed(const struct lacp *, const void *slave,
+ bool carrier_up);
bool lacp_slave_may_enable(const struct lacp *, const void *slave);
bool lacp_slave_is_current(const struct lacp *, const void *slave_);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index db461ac..cb6baf3 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3606,11 +3606,6 @@ ofport_update_peer(struct ofport_dpif *ofport)
static bool
may_enable_port(struct ofport_dpif *ofport)
{
- /* Carrier must be up. */
- if (!netdev_get_carrier(ofport->up.netdev)) {
- return false;
- }
-
/* If CFM or BFD is enabled, then at least one of them must report that the
* port is up. */
if ((ofport->bfd || ofport->cfm)
@@ -3636,12 +3631,17 @@ port_run(struct ofport_dpif *ofport)
{
long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
bool carrier_changed = carrier_seq != ofport->carrier_seq;
+ bool enable = netdev_get_carrier(ofport->up.netdev);
+
ofport->carrier_seq = carrier_seq;
if (carrier_changed && ofport->bundle) {
- lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
+ lacp_slave_carrier_changed(ofport->bundle->lacp, ofport, enable);
+ }
+
+ if (enable) {
+ enable = may_enable_port(ofport);
}
- bool enable = may_enable_port(ofport);
if (ofport->up.may_enable != enable) {
ofproto_port_set_enable(&ofport->up, enable);
--
1.9.1
More information about the dev
mailing list