[ovs-dev] [PATCH v1 1/2] Fix packet drops on LACP bond after link up

Manohar Krishnappa Chidambaraswamy manohar.krishnappa.chidambaraswamy at ericsson.com
Tue Jun 5 10:36:40 UTC 2018


Hi,

Could someone take a look at this patch? We hit this issue (drops) on a customer setup and appreciate your comments/suggestions.

Thanx
Manu

On 17/05/18, 3:26 PM, "ovs-dev-bounces at openvswitch.org on behalf of Manohar Krishnappa Chidambaraswamy" <ovs-dev-bounces at openvswitch.org on behalf of manohar.krishnappa.chidambaraswamy at ericsson.com> wrote:

    1/2: Fix packet drops on LACP bond after link up
    
    Problem:
    ========
    During port DOWN->UP of link (slave) in a LACP bond, after receiving the
    LACPDU with SYNC set for both actor and partner, the bond-slave remains
    "disabled" until OVS main thread runs LACP state machine and eventually
    "enables" the bond-slave. With this, we have observed delays in the order
    of 350ms and packets are  dropped in OVS due to bond-admissibility check
    (packets received on slave in "disabled" state are dropped).
    
    Fix:
    ====
    When a LACPDU is received, evaluate whether LACP slave can be enabled
    (slave_may_enable()) and set LACP slave's may_enable from the datapath
    thread itself. When may_enable = TRUE, it means L1 state is UP and
    LACP-SYNC is done and it is waiting for the main thread to enable the
    slave. Relax the check in bond_check_admissibility() to check for both
    "enable" and "may_enable" of the LACP slave. This would avoid dropping
    of packets until the main thread enables the slave from bundle_run().
    
    Signed-off-by: Manohar K C
    <manohar.krishnappa.chidambaraswamy at ericsson.com>
    CC: Jan Scheurich <jan.scheurich at ericsson.com>
    CC: Nitin Katiyar <nitin.katiyar at ericsson.com>
    
    ---
    
    v1 2/2: Avoid LACP negotiation when NIC driver reports PHY status as DOWN.
    
    lib/lacp.c                   | 14 ++++++++++++--
    lib/lacp.h                   |  3 ++-
    ofproto/bond.c               | 18 +++++++++++++++---
    ofproto/ofproto-dpif-xlate.c | 13 ++++++++++++-
    4 files changed, 41 insertions(+), 7 deletions(-)
    
    diff --git a/lib/lacp.c b/lib/lacp.c
    index 8353746..8d97ad5 100644
    --- a/lib/lacp.c
    +++ b/lib/lacp.c
    @@ -154,6 +154,7 @@ static struct slave *slave_lookup(const struct lacp *, const void *slave)
         OVS_REQUIRES(mutex);
    static bool info_tx_equal(struct lacp_info *, struct lacp_info *)
         OVS_REQUIRES(mutex);
    +static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex);
    static unixctl_cb_func lacp_unixctl_show;
    static unixctl_cb_func lacp_unixctl_show_stats;
    @@ -324,8 +325,8 @@ lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex)
    /* Processes 'packet' which was received on 'slave_'.  This function should be
      * called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP.
      */
    -void
    -lacp_process_packet(struct lacp *lacp, const void *slave_,
    +bool
    +lacp_process_packet(struct lacp *lacp, const void *bond, const void *slave_,
                         const struct dp_packet *packet)
         OVS_EXCLUDED(mutex)
    {
    @@ -333,6 +334,7 @@ lacp_process_packet(struct lacp *lacp, const void *slave_,
         const struct lacp_pdu *pdu;
         long long int tx_rate;
         struct slave *slave;
    +    bool lacp_may_enable = false;
         lacp_lock();
         slave = slave_lookup(lacp, slave_);
    @@ -362,8 +364,16 @@ lacp_process_packet(struct lacp *lacp, const void *slave_,
             slave->partner = pdu->actor;
         }
    +    /*
    +     * Evaluate may_enable here to avoid dropping of packets till main thread
    +     * sets may_enable to true.
    +     */
    +    lacp_may_enable = slave_may_enable__(slave);
    +
    out:
         lacp_unlock();
    +
    +    return lacp_may_enable;
    }
    /* Returns the lacp_status of the given 'lacp' object (which may be NULL). */
    diff --git a/lib/lacp.h b/lib/lacp.h
    index f35cff5..1505c2c 100644
    --- a/lib/lacp.h
    +++ b/lib/lacp.h
    @@ -46,7 +46,8 @@ struct lacp *lacp_ref(const struct lacp *);
    void lacp_configure(struct lacp *, const struct lacp_settings *);
    bool lacp_is_active(const struct lacp *);
    -void lacp_process_packet(struct lacp *, const void *slave,
    +bool lacp_process_packet(struct lacp *, const void *bond,
    +                         const void *slave,
                              const struct dp_packet *packet);
    enum lacp_status lacp_status(const struct lacp *);
    diff --git a/ofproto/bond.c b/ofproto/bond.c
    index 11d28e1..7ca3687 100644
    --- a/ofproto/bond.c
    +++ b/ofproto/bond.c
    @@ -777,6 +777,7 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
    {
         enum bond_verdict verdict = BV_DROP;
         struct bond_slave *slave;
    +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
         ovs_rwlock_rdlock(&rwlock);
         slave = bond_slave_lookup(bond, slave_);
    @@ -794,7 +795,13 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
          * drop all incoming traffic except if lacp_fallback_ab is enabled. */
         switch (bond->lacp_status) {
         case LACP_NEGOTIATED:
    -        verdict = slave->enabled ? BV_ACCEPT : BV_DROP;
    +        /*
    +         * To reduce packet-drops due to delay in enabling of slave (post
    +         * LACP-SYNC), from main thread, check for may_enable as well.
    +         * When may_enable is TRUE, it means LACP is UP and waiting for
    +         * the main thread to run LACP state machine and enable the slave.
    +         */
    +        verdict = (slave->enabled || slave->may_enable) ? BV_ACCEPT : BV_DROP;
             goto out;
         case LACP_CONFIGURED:
             if (!bond->lacp_fallback_ab) {
    @@ -830,8 +837,6 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
             /* Drop all packets which arrive on backup slaves.  This is similar to
              * how Linux bonding handles active-backup bonds. */
             if (bond->active_slave != slave) {
    -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
    -
                 VLOG_DBG_RL(&rl, "active-backup bond received packet on backup"
                             " slave (%s) destined for " ETH_ADDR_FMT,
                             slave->name, ETH_ADDR_ARGS(eth_dst));
    @@ -853,6 +858,13 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
         OVS_NOT_REACHED();
    out:
    +    if (slave && (verdict != BV_ACCEPT)) {
    +        VLOG_DBG_RL(&rl, "slave= %s actv-slave= %d may_enable %d enable %d "
    +                    "LACP %d verdict(A/D/M=0/1/2) %d\n", slave->name,
    +                    (bond->active_slave == slave), slave->may_enable,
    +                    slave->enabled, bond->lacp_status, verdict);
    +    }
    +
         ovs_rwlock_unlock(&rwlock);
         return verdict;
    diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
    index 5641724..81dcdb6 100644
    --- a/ofproto/ofproto-dpif-xlate.c
    +++ b/ofproto/ofproto-dpif-xlate.c
    @@ -3160,6 +3160,7 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport)
         const struct xbridge *xbridge = ctx->xbridge;
         const struct dp_packet *packet = ctx->xin->packet;
         enum slow_path_reason slow;
    +    bool lacp_may_enable;
         if (!xport) {
             slow = 0;
    @@ -3180,7 +3181,17 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport)
         } else if (xport->xbundle && xport->xbundle->lacp
                    && flow->dl_type == htons(ETH_TYPE_LACP)) {
             if (packet) {
    -            lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
    +            lacp_may_enable = lacp_process_packet(xport->xbundle->lacp,
    +                                                  xport->xbundle->bond,
    +                                                  xport->ofport, packet);
    +            /*
    +             * Update LACP status in bond-slave to avoid packet-drops until
    +             * LACP state machine is run by the main thread.
    +             */
    +            if (xport->xbundle->bond && lacp_may_enable) {
    +                bond_slave_set_may_enable(xport->xbundle->bond, xport->ofport,
    +                                          lacp_may_enable);
    +            }
             }
             slow = SLOW_LACP;
         } else if ((xbridge->stp || xbridge->rstp) &&
    --
    1.9.1
    
    _______________________________________________
    dev mailing list
    dev at openvswitch.org
    https://mail.openvswitch.org/mailman/listinfo/ovs-dev
    



More information about the dev mailing list