[ovs-dev] [PATCH 2/2] cfm: Expose detailed fault status in the database.

Ethan Jackson ethan at nicira.com
Thu Feb 9 02:07:45 UTC 2012


It's worth noting that the changes in this version of the patch make
patch 2/3 of the original series unnecessary.  I think it's a cleanup
though so I'm inclined to keep it unless there is debate on the issue.

Ethan

On Wed, Feb 8, 2012 at 17:29, Ethan Jackson <ethan at nicira.com> wrote:
> The cfm_fault column of the database is the logical OR of a number
> of reasons that CFM can be in a faulted state.  A controller may
> want to have more specific information in which case it can look at
> the cfm_fault_status column which this patch adds.
>
> Signed-off-by: Ethan Jackson <ethan at nicira.com>
> ---
>  lib/cfm.c                  |   78 +++++++++++++++++++++++++++++++++-----------
>  lib/cfm.h                  |   25 +++++++++++++-
>  ofproto/ofproto-provider.h |    7 ++--
>  ofproto/ofproto.c          |    7 ++--
>  vswitchd/bridge.c          |   15 ++++++++
>  vswitchd/vswitch.ovsschema |    6 ++-
>  vswitchd/vswitch.xml       |   37 +++++++++++++++++++++
>  7 files changed, 147 insertions(+), 28 deletions(-)
>
> diff --git a/lib/cfm.c b/lib/cfm.c
> index 537eeaa..d373f42 100644
> --- a/lib/cfm.c
> +++ b/lib/cfm.c
> @@ -85,8 +85,8 @@ struct cfm {
>
>     uint64_t mpid;
>     bool extended;         /* Extended mode. */
> -    bool fault;            /* Indicates connectivity fault. */
> -    bool unexpected_recv;  /* Received an unexpected CCM. */
> +    int fault;             /* Connectivity fault status. */
> +    int recv_fault;        /* Bit mask of faults occuring on receive. */
>     bool opup;             /* Operational State. */
>     bool remote_opup;      /* Remote Operational State. */
>
> @@ -136,6 +136,36 @@ cfm_ccm_addr(const struct cfm *cfm)
>     return cfm->extended ? eth_addr_ccm_x : eth_addr_ccm;
>  }
>
> +/* Returns the string representation of the given cfm_fault_reason 'reason'. */
> +const char *
> +cfm_fault_reason_to_str(int reason) {
> +    switch (reason) {
> +#define CFM_FAULT_REASON(NAME, STR) case CFM_FAULT_##NAME: return #STR;
> +        CFM_FAULT_REASONS
> +#undef CFM_FAULT_REASON
> +    default: return "<unknown>";
> +    }
> +}
> +
> +static void
> +ds_put_cfm_fault(struct ds *ds, int fault)
> +{
> +    size_t length = ds->length;
> +    int i;
> +
> +    for (i = 0; i < CFM_FAULT_N_REASONS; i++) {
> +        int reason = 1 << i;
> +
> +        if (fault & reason) {
> +            ds_put_format(ds, "%s ", cfm_fault_reason_to_str(reason));
> +        }
> +    }
> +
> +    if (ds->length > length) {
> +        ds_truncate(ds, ds->length - 1);
> +    }
> +}
> +
>  static void
>  cfm_generate_maid(struct cfm *cfm)
>  {
> @@ -291,8 +321,8 @@ cfm_run(struct cfm *cfm)
>         struct remote_mp *rmp, *rmp_next;
>         bool old_cfm_fault = cfm->fault;
>
> -        cfm->fault = cfm->unexpected_recv;
> -        cfm->unexpected_recv = false;
> +        cfm->fault = cfm->recv_fault;
> +        cfm->recv_fault = 0;
>
>         cfm->rmps_array_len = 0;
>         free(cfm->rmps_array);
> @@ -313,13 +343,13 @@ cfm_run(struct cfm *cfm)
>                 if (rmp->mpid == cfm->mpid) {
>                     VLOG_WARN_RL(&rl,"%s: received CCM with local MPID"
>                                  " %"PRIu64, cfm->name, rmp->mpid);
> -                    cfm->fault = true;
> +                    cfm->fault |= CFM_FAULT_LOOPBACK;
>                 }
>
>                 if (rmp->rdi) {
>                     VLOG_DBG("%s: RDI bit flagged from RMP %"PRIu64, cfm->name,
>                              rmp->mpid);
> -                    cfm->fault = true;
> +                    cfm->fault |= CFM_FAULT_RDI;
>                 }
>
>                 if (!rmp->opup) {
> @@ -331,12 +361,16 @@ cfm_run(struct cfm *cfm)
>         }
>
>         if (hmap_is_empty(&cfm->remote_mps)) {
> -            cfm->fault = true;
> +            cfm->fault |= CFM_FAULT_RECV;
>         }
>
>         if (old_cfm_fault != cfm->fault) {
> -            VLOG_INFO_RL(&rl, "%s: CFM fault status changed to %s",
> -                         cfm->name, cfm->fault ? "true" : "false");
> +            struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +            ds_put_cfm_fault(&ds, cfm->fault);
> +            VLOG_INFO_RL(&rl, "%s: CFM fault status changed: %s", cfm->name,
> +                         ds_cstr_ro(&ds));
> +            ds_destroy(&ds);
>         }
>
>         timer_set_duration(&cfm->fault_timer, interval);
> @@ -481,7 +515,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
>      * bonds. Furthermore, faults can be maliciously triggered by crafting
>      * invalid CCMs. */
>     if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) {
> -        cfm->unexpected_recv = true;
> +        cfm->recv_fault |= CFM_FAULT_MAID;
>         VLOG_WARN_RL(&rl, "%s: Received unexpected remote MAID from MAC "
>                      ETH_ADDR_FMT, cfm->name, ETH_ADDR_ARGS(eth->eth_src));
>     } else {
> @@ -522,7 +556,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
>                 rmp = xzalloc(sizeof *rmp);
>                 hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid));
>             } else {
> -                cfm->unexpected_recv = true;
> +                cfm->recv_fault |= CFM_FAULT_OVERFLOW;
>                 VLOG_WARN_RL(&rl,
>                              "%s: dropped CCM with MPID %"PRIu64" from MAC "
>                              ETH_ADDR_FMT, cfm->name, ccm_mpid,
> @@ -551,13 +585,14 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
>     }
>  }
>
> -/* Gets the fault status of 'cfm'.  Returns true when 'cfm' has detected
> - * connectivity problems, false otherwise. */
> -bool
> +/* Gets the fault status of 'cfm'.  Returns a bit mask of 'cfm_fault_reason's
> + * indicating the cause of the connectivity fault, or zero if there is no
> + * fault. */
> +int
>  cfm_get_fault(const struct cfm *cfm)
>  {
>     if (cfm->fault_override >= 0) {
> -        return cfm->fault_override;
> +        return cfm->fault_override ? CFM_FAULT_OVERRIDE : 0;
>     }
>     return cfm->fault;
>  }
> @@ -602,11 +637,16 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm)
>     struct remote_mp *rmp;
>
>     ds_put_format(ds, "---- %s ----\n", cfm->name);
> -    ds_put_format(ds, "MPID %"PRIu64":%s%s%s%s\n", cfm->mpid,
> +    ds_put_format(ds, "MPID %"PRIu64":%s%s\n", cfm->mpid,
>                   cfm->extended ? " extended" : "",
> -                  cfm_get_fault(cfm) ? " fault" : "",
> -                  cfm->fault_override >= 0 ? " fault_override" : "",
> -                  cfm->unexpected_recv ? " unexpected_recv" : "");
> +                  cfm->fault_override >= 0 ? " fault_override" : "");
> +
> +
> +    if (cfm_get_fault(cfm)) {
> +        ds_put_cstr(ds, "\tfault: ");
> +        ds_put_cfm_fault(ds, cfm_get_fault(cfm));
> +        ds_put_cstr(ds, "\n");
> +    }
>
>     ds_put_format(ds, "\topstate: %s\n", cfm->opup ? "up" : "down");
>     ds_put_format(ds, "\tremote_opstate: %s\n",
> diff --git a/lib/cfm.h b/lib/cfm.h
> index 5106a51..6d23293 100644
> --- a/lib/cfm.h
> +++ b/lib/cfm.h
> @@ -24,6 +24,28 @@
>  struct flow;
>  struct ofpbuf;
>
> +#define CFM_FAULT_REASONS                  \
> +    CFM_FAULT_REASON(RECV, recv)           \
> +    CFM_FAULT_REASON(RDI, rdi)             \
> +    CFM_FAULT_REASON(MAID, maid)           \
> +    CFM_FAULT_REASON(LOOPBACK, loopback)   \
> +    CFM_FAULT_REASON(OVERFLOW, overflow)   \
> +    CFM_FAULT_REASON(OVERRIDE, override)
> +
> +enum cfm_fault_bit_index {
> +#define CFM_FAULT_REASON(NAME, STR) CFM_FAULT_INDEX_##NAME,
> +    CFM_FAULT_REASONS
> +#undef CFM_FAULT_REASON
> +    CFM_FAULT_N_REASONS
> +};
> +
> +enum cfm_fault_reason {
> +#define CFM_FAULT_REASON(NAME, STR) \
> +    CFM_FAULT_##NAME = 1 << CFM_FAULT_INDEX_##NAME,
> +    CFM_FAULT_REASONS
> +#undef CFM_FAULT_REASON
> +};
> +
>  struct cfm_settings {
>     uint64_t mpid;              /* The MPID of this CFM. */
>     int interval;               /* The requested transmission interval. */
> @@ -43,9 +65,10 @@ void cfm_wait(struct cfm *);
>  bool cfm_configure(struct cfm *, const struct cfm_settings *);
>  bool cfm_should_process_flow(const struct cfm *cfm, const struct flow *);
>  void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet);
> -bool cfm_get_fault(const struct cfm *);
> +int cfm_get_fault(const struct cfm *);
>  bool cfm_get_opup(const struct cfm *);
>  void cfm_get_remote_mpids(const struct cfm *, const uint64_t **rmps,
>                           size_t *n_rmps);
> +const char *cfm_fault_reason_to_str(int fault);
>
>  #endif /* cfm.h */
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index cb97188..3927a2e 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -961,9 +961,10 @@ struct ofproto_class {
>      * support CFM, as does a null pointer. */
>     int (*set_cfm)(struct ofport *ofport, const struct cfm_settings *s);
>
> -    /* Checks the fault status of CFM configured on 'ofport'.  Returns 1 if CFM
> -     * is faulted (generally indicating a connectivity problem), 0 if CFM is
> -     * not faulted, or -1 if CFM is not enabled on 'port'
> +    /* Checks the fault status of CFM configured on 'ofport'.  Returns a
> +     * bitmask of 'cfm_fault_reason's to indicate a CFM fault (generally
> +     * indicating a connectivity problem).  Returns zero if CFM is not faulted,
> +     * and -1 if CFM is not enabled on 'port'.
>      *
>      * This function may be a null pointer if the ofproto implementation does
>      * not support CFM. */
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index c35d440..64d19d7 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -2435,9 +2435,10 @@ ofproto_get_netflow_ids(const struct ofproto *ofproto,
>     ofproto->ofproto_class->get_netflow_ids(ofproto, engine_type, engine_id);
>  }
>
> -/* Checks the fault status of CFM for 'ofp_port' within 'ofproto'.  Returns 1
> - * if CFM is faulted (generally indiciating a connectivity problem), 0 if CFM
> - * is not faulted, and -1 if CFM is not enabled on 'ofp_port'. */
> +/* Checks the fault status of CFM for 'ofp_port' within 'ofproto'.  Returns a
> + * bitmask of 'cfm_fault_reason's to indicate a CFM fault (generally
> + * indicating a connectivity problem).  Returns zero if CFM is not faulted,
> + * and -1 if CFM is not enabled on 'port'. */
>  int
>  ofproto_port_get_cfm_fault(const struct ofproto *ofproto, uint16_t ofp_port)
>  {
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index c175c58..15fb632 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -285,6 +285,7 @@ bridge_init(const char *remote)
>     ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_statistics);
>     ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_status);
>     ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault);
> +    ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault_status);
>     ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids);
>     ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current);
>     ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids);
> @@ -1554,10 +1555,23 @@ iface_refresh_cfm_stats(struct iface *iface)
>     fault = ofproto_port_get_cfm_fault(iface->port->bridge->ofproto,
>                                        iface->ofp_port);
>     if (fault >= 0) {
> +        const char *reasons[CFM_FAULT_N_REASONS];
>         bool fault_bool = fault;
> +        size_t i, j;
> +
> +        j = 0;
> +        for (i = 0; i < CFM_FAULT_N_REASONS; i++) {
> +            int reason = 1 << i;
> +            if (fault & reason) {
> +                reasons[j++] = cfm_fault_reason_to_str(reason);
> +            }
> +        }
> +
>         ovsrec_interface_set_cfm_fault(cfg, &fault_bool, 1);
> +        ovsrec_interface_set_cfm_fault_status(cfg, (char **) reasons, j);
>     } else {
>         ovsrec_interface_set_cfm_fault(cfg, NULL, 0);
> +        ovsrec_interface_set_cfm_fault_status(cfg, NULL, 0);
>     }
>
>     error = ofproto_port_get_cfm_remote_mpids(iface->port->bridge->ofproto,
> @@ -3046,6 +3060,7 @@ iface_clear_db_record(const struct ovsrec_interface *if_cfg)
>         ovsrec_interface_set_link_state(if_cfg, NULL);
>         ovsrec_interface_set_mtu(if_cfg, NULL, 0);
>         ovsrec_interface_set_cfm_fault(if_cfg, NULL, 0);
> +        ovsrec_interface_set_cfm_fault_status(if_cfg, NULL, 0);
>         ovsrec_interface_set_cfm_remote_mpids(if_cfg, NULL, 0);
>         ovsrec_interface_set_lacp_current(if_cfg, NULL, 0);
>         ovsrec_interface_set_statistics(if_cfg, NULL, NULL, 0);
> diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
> index 6f2c458..c260c43 100644
> --- a/vswitchd/vswitch.ovsschema
> +++ b/vswitchd/vswitch.ovsschema
> @@ -1,6 +1,6 @@
>  {"name": "Open_vSwitch",
> - "version": "6.5.0",
> - "cksum": "2847700438 16419",
> + "version": "6.6.0",
> + "cksum": "3676036878 16515",
>  "tables": {
>    "Open_vSwitch": {
>      "columns": {
> @@ -205,6 +205,8 @@
>            "min": 0,
>            "max": 1},
>          "ephemeral": true},
> +       "cfm_fault_status": {
> +         "type": {"key": "string", "min": 0, "max": "unlimited"}},
>        "lacp_current": {
>          "type": {"key": {"type": "boolean"},
>                   "min": 0, "max": 1},
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 7be7891..9e38d7f 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -1699,6 +1699,43 @@
>         </p>
>       </column>
>
> +      <column name="cfm_fault_status" key="recv">
> +        Indicates a CFM fault was triggered due to a lack of CCMs received on
> +        the <ref table="Interface"/>.
> +      </column>
> +
> +      <column name="cfm_fault_status" key="rdi">
> +        Indicates a CFM fault was triggered due to the reception of a CCM with
> +        the RDI bit flagged.  Endpoints set the RDI bit in their CCMs when they
> +        are not receiving CCMs themselves.  This typically indicates a
> +        unidirectional connectivity failure.
> +      </column>
> +
> +      <column name="cfm_fault_status" key="maid">
> +        Indicates a CFM fault was triggered due to the reception of a CCM with
> +        a MAID other than the one Open vSwitch uses.  CFM broadcasts are tagged
> +        with an identification number in addition to the MPID called the MAID.
> +        Open vSwitch only supports receiving CCM broadcasts tagged with the
> +        MAID it uses internally.
> +      </column>
> +
> +      <column name="cfm_fault_status" key="loopback">
> +        Indicates a CFM fault was triggered due to the reception of a CCM
> +        advertising the same MPID configured in the <ref column="cfm_mpid"/>
> +        column of this <ref table="Interface"/>.  This may indicate a loop in
> +        the network.
> +      </column>
> +
> +      <column name="cfm_fault_status" key="overflow">
> +        Indicates a CFM fault was triggered because the CFM module received
> +        CCMs from more remote endpoints than it can keep track of.
> +      </column>
> +
> +      <column name="cfm_fault_status" key="override">
> +        Indicates a CFM fault was manually triggered by an administrator using
> +        an <code>ovs-appctl</code> command.
> +      </column>
> +
>       <column name="cfm_remote_mpids">
>         When CFM is properly configured, Open vSwitch will occasionally
>         receive CCM broadcasts.  These broadcasts contain the MPID of the
> --
> 1.7.9
>



More information about the dev mailing list