[ovs-dev] [arp-rw 12/13] dpif: Support working around actions that a datapath does not support.

Alex Wang alexw at nicira.com
Wed Oct 9 18:25:09 UTC 2013


Honestly speaking, this patch helps me understand more about how dpif works.

The changes all make sense and look good to me.  still want to discuss with
you about my understanding later, to see if i fully understand this patch.


On Mon, Sep 23, 2013 at 10:49 AM, Ben Pfaff <blp at nicira.com> wrote:

> Until now, OVS has expected that the datapath supports all the actions
> required by any flow to be installed.  There are at least two reasons why
> a datapath might not support a given action:
>
>     - The datapath version is older than the userspace version, and the
>       action was introduced after the version of the datapath in use.
>
>     - The action is not considered important enough to implement as part of
>       an ABI that must be maintained forever.
>
> This commit adds infrastructure to handle these cases.  It doesn't actually
> add any uses; that will come in an upcoming commit.
>
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  lib/dpif.c                    |  192
> ++++++++++++++++++++++++++++++++++-------
>  lib/dpif.h                    |   13 ++-
>  lib/odp-util.c                |   10 ++-
>  lib/odp-util.h                |   65 +++++++-------
>  ofproto/ofproto-dpif-upcall.c |    1 +
>  ofproto/ofproto-dpif-xlate.c  |   15 ++--
>  ofproto/ofproto-dpif.c        |    3 +-
>  7 files changed, 227 insertions(+), 72 deletions(-)
>
> diff --git a/lib/dpif.c b/lib/dpif.c
> index 02cc36a..c516f51 100644
> --- a/lib/dpif.c
> +++ b/lib/dpif.c
> @@ -28,6 +28,7 @@
>  #include "flow.h"
>  #include "netdev.h"
>  #include "netlink.h"
> +#include "odp-execute.h"
>  #include "odp-util.h"
>  #include "ofp-errors.h"
>  #include "ofp-print.h"
> @@ -55,6 +56,7 @@ COVERAGE_DEFINE(dpif_flow_query_list);
>  COVERAGE_DEFINE(dpif_flow_query_list_n);
>  COVERAGE_DEFINE(dpif_execute);
>  COVERAGE_DEFINE(dpif_purge);
> +COVERAGE_DEFINE(dpif_execute_with_help);
>
>  static const struct dpif_class *base_dpif_classes[] = {
>  #ifdef LINUX_DATAPATH
> @@ -1063,6 +1065,94 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump)
>      return dump->error == EOF ? 0 : dump->error;
>  }
>
> +struct dpif_execute_helper_aux {
> +    struct dpif *dpif;
> +    int error;
> +};
> +
> +static void
> +dpif_execute_helper_execute__(void *aux_, struct ofpbuf *packet,
> +                              const struct flow *flow,
> +                              const struct nlattr *actions, size_t
> actions_len)
> +{
> +    struct dpif_execute_helper_aux *aux = aux_;
> +    struct dpif_execute execute;
> +    struct odputil_keybuf key_stub;
> +    struct ofpbuf key;
> +    int error;
> +
> +    ofpbuf_use_stub(&key, &key_stub, sizeof key_stub);
> +    odp_flow_key_from_flow(&key, flow, flow->in_port.odp_port);
> +
> +    execute.key = key.data;
> +    execute.key_len = key.size;
> +    execute.actions = actions;
> +    execute.actions_len = actions_len;
> +    execute.packet = packet;
> +    execute.needs_help = false;
> +
> +    error = aux->dpif->dpif_class->execute(aux->dpif, &execute);
> +    if (error) {
> +        aux->error = error;
> +    }
> +}
> +
> +static void
> +dpif_execute_helper_output_cb(void *aux, struct ofpbuf *packet,
> +                              const struct flow *flow, odp_port_t
> out_port)
> +{
> +    uint64_t actions_stub[DIV_ROUND_UP(NL_A_U32_SIZE, 8)];
> +    struct ofpbuf actions;
> +
> +    ofpbuf_use_stack(&actions, actions_stub, sizeof actions_stub);
> +    nl_msg_put_u32(&actions, OVS_ACTION_ATTR_OUTPUT,
> odp_to_u32(out_port));
> +
> +    dpif_execute_helper_execute__(aux, packet, flow,
> +                                  actions.data, actions.size);
> +}
> +
> +static void
> +dpif_execute_helper_userspace_cb(void *aux, struct ofpbuf *packet,
> +                                 const struct flow *flow,
> +                                 const struct nlattr *action)
> +{
> +    dpif_execute_helper_execute__(aux, packet, flow,
> +                                  action, NLA_ALIGN(action->nla_len));
> +}
> +
> +/* Executes 'execute' by performing most of the actions in userspace and
> + * passing the fully constructed packets to 'dpif' for output and
> userspace
> + * actions.
> + *
> + * This helps with actions that a given 'dpif' doesn't implement
> directly. */
> +static int
> +dpif_execute_with_help(struct dpif *dpif, const struct dpif_execute
> *execute)
> +{
> +    struct dpif_execute_helper_aux aux;
> +    enum odp_key_fitness fit;
> +    struct ofpbuf *packet;
> +    struct flow flow;
> +
> +    COVERAGE_INC(dpif_execute_with_help);
> +
> +    fit = odp_flow_key_to_flow(execute->key, execute->key_len, &flow);
> +    if (fit == ODP_FIT_ERROR) {
> +        return EINVAL;
> +    }
> +
> +    aux.dpif = dpif;
> +    aux.error = 0;
> +
> +    packet = ofpbuf_clone_with_headroom(execute->packet, VLAN_HEADER_LEN);
> +    odp_execute_actions(&aux, packet, &flow,
> +                        execute->actions, execute->actions_len,
> +                        dpif_execute_helper_output_cb,
> +                        dpif_execute_helper_userspace_cb);
> +    ofpbuf_delete(packet);
> +
> +    return aux.error;
> +}
> +
>  static int
>  dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute)
>  {
> @@ -1070,7 +1160,9 @@ dpif_execute__(struct dpif *dpif, const struct
> dpif_execute *execute)
>
>      COVERAGE_INC(dpif_execute);
>      if (execute->actions_len > 0) {
> -        error = dpif->dpif_class->execute(dpif, execute);
> +        error = (execute->needs_help
> +                 ? dpif_execute_with_help(dpif, execute)
> +                 : dpif->dpif_class->execute(dpif, execute));
>      } else {
>          error = 0;
>      }
> @@ -1086,12 +1178,20 @@ dpif_execute__(struct dpif *dpif, const struct
> dpif_execute *execute)
>   * it contains some metadata that cannot be recovered from 'packet', such
> as
>   * tunnel and in_port.)
>   *
> + * Some dpif providers do not implement every action.  The Linux kernel
> + * datapath, in particular, does not implement ARP field modification.  If
> + * 'needs_help' is true, the dpif layer executes in userspace all of the
> + * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
> + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the
> dpif
> + * implementation.
> + *
>   * Returns 0 if successful, otherwise a positive errno value. */
>  int
>  dpif_execute(struct dpif *dpif,
>               const struct nlattr *key, size_t key_len,
>               const struct nlattr *actions, size_t actions_len,
> -             const struct ofpbuf *buf)
> +             const struct ofpbuf *buf,
> +             bool needs_help)
>  {
>      struct dpif_execute execute;
>
> @@ -1100,6 +1200,7 @@ dpif_execute(struct dpif *dpif,
>      execute.actions = actions;
>      execute.actions_len = actions_len;
>      execute.packet = buf;
> +    execute.needs_help = needs_help;
>      return dpif_execute__(dpif, &execute);
>  }
>
> @@ -1112,54 +1213,83 @@ dpif_execute(struct dpif *dpif,
>  void
>  dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
>  {
> -    size_t i;
> -
>      if (dpif->dpif_class->operate) {
> -        dpif->dpif_class->operate(dpif, ops, n_ops);
> +        while (n_ops > 0) {
> +            size_t chunk;
> +
> +            /* Count 'chunk', the number of ops that can be executed
> without
> +             * needing any help.  Ops that need help should be rare, so we
> +             * expect this to ordinarily be 'n_ops', that is, all the
> ops. */
> +            for (chunk = 0; chunk < n_ops; chunk++) {
> +                struct dpif_op *op = ops[chunk];
> +
> +                if (op->type == DPIF_OP_EXECUTE &&
> op->u.execute.needs_help) {
> +                    break;
> +                }
> +            }
> +
> +            if (chunk) {
> +                /* Execute a chunk full of ops that the dpif provider can
> +                 * handle itself, without help. */
> +                size_t i;
> +
> +                dpif->dpif_class->operate(dpif, ops, chunk);
> +
> +                for (i = 0; i < chunk; i++) {
> +                    struct dpif_op *op = ops[i];
> +
> +                    switch (op->type) {
> +                    case DPIF_OP_FLOW_PUT:
> +                        log_flow_put_message(dpif, &op->u.flow_put,
> op->error);
> +                        break;
> +
> +                    case DPIF_OP_FLOW_DEL:
> +                        log_flow_del_message(dpif, &op->u.flow_del,
> op->error);
> +                        break;
> +
> +                    case DPIF_OP_EXECUTE:
> +                        log_execute_message(dpif, &op->u.execute,
> op->error);
> +                        break;
> +                    }
> +                }
> +
> +                ops += chunk;
> +                n_ops -= chunk;
> +            } else {
> +                /* Help the dpif provider to execute one op. */
> +                struct dpif_op *op = ops[0];
> +
> +                op->error = dpif_execute__(dpif, &op->u.execute);
> +                ops++;
> +                n_ops--;
> +            }
> +        }
> +    } else {
> +        size_t i;
>
>          for (i = 0; i < n_ops; i++) {
>              struct dpif_op *op = ops[i];
>
>              switch (op->type) {
>              case DPIF_OP_FLOW_PUT:
> -                log_flow_put_message(dpif, &op->u.flow_put, op->error);
> +                op->error = dpif_flow_put__(dpif, &op->u.flow_put);
>                  break;
>
>              case DPIF_OP_FLOW_DEL:
> -                log_flow_del_message(dpif, &op->u.flow_del, op->error);
> +                op->error = dpif_flow_del__(dpif, &op->u.flow_del);
>                  break;
>
>              case DPIF_OP_EXECUTE:
> -                log_execute_message(dpif, &op->u.execute, op->error);
> +                op->error = dpif_execute__(dpif, &op->u.execute);
>                  break;
> -            }
> -        }
> -        return;
> -    }
> -
> -    for (i = 0; i < n_ops; i++) {
> -        struct dpif_op *op = ops[i];
> -
> -        switch (op->type) {
> -        case DPIF_OP_FLOW_PUT:
> -            op->error = dpif_flow_put__(dpif, &op->u.flow_put);
> -            break;
> -
> -        case DPIF_OP_FLOW_DEL:
> -            op->error = dpif_flow_del__(dpif, &op->u.flow_del);
> -            break;
>
> -        case DPIF_OP_EXECUTE:
> -            op->error = dpif_execute__(dpif, &op->u.execute);
> -            break;
> -
> -        default:
> -            NOT_REACHED();
> +            default:
> +                NOT_REACHED();
> +            }
>          }
>      }
>  }
>
> -
>  /* Returns a string that represents 'type', for use in log messages. */
>  const char *
>  dpif_upcall_type_to_string(enum dpif_upcall_type type)
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 7a258c7..8996c0a 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -493,7 +493,8 @@ int dpif_flow_dump_done(struct dpif_flow_dump *);
>  int dpif_execute(struct dpif *,
>                   const struct nlattr *key, size_t key_len,
>                   const struct nlattr *actions, size_t actions_len,
> -                 const struct ofpbuf *);
> +                 const struct ofpbuf *,
> +                 bool needs_help);
>
>  /* Operation batching interface.
>   *
> @@ -531,11 +532,21 @@ struct dpif_flow_del {
>  };
>
>  struct dpif_execute {
> +    /* Raw support for execute passed along to the provider. */
>      const struct nlattr *key;       /* Partial flow key (only for
> metadata). */
>      size_t key_len;                 /* Length of 'key' in bytes. */
>      const struct nlattr *actions;   /* Actions to execute on packet. */
>      size_t actions_len;             /* Length of 'actions' in bytes. */
>      const struct ofpbuf *packet;    /* Packet to execute. */
> +
> +    /* Some dpif providers do not implement every action.  The Linux
> kernel
> +     * datapath, in particular, does not implement ARP field modification.
> +     *
> +     * If this member is set to true, the dpif layer executes in
> userspace all
> +     * of the actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
> +     * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to
> the
> +     * dpif implementation. */
> +    bool needs_help;
>  };
>
>  struct dpif_op {
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 0edbbf0..b317cc2 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -3543,13 +3543,17 @@ commit_set_pkt_mark_action(const struct flow
> *flow, struct flow *base,
>
>      odp_put_pkt_mark_action(base->pkt_mark, odp_actions);
>  }
> +
>  /* If any of the flow key data that ODP actions can modify are different
> in
>   * 'base' and 'flow', appends ODP actions to 'odp_actions' that change
> the flow
>   * key from 'base' into 'flow', and then changes 'base' the same way.
>  Does not
>   * commit set_tunnel actions.  Users should call
> commit_odp_tunnel_action()
>   * in addition to this function if needed.  Sets fields in 'wc' that are
> - * used as part of the action. */
> -void
> + * used as part of the action.
> + *
> + * Returns a reason to force processing the flow's packets into the
> userspace
> + * slow path, if there is one, otherwise 0. */
> +enum slow_path_reason
>  commit_odp_actions(const struct flow *flow, struct flow *base,
>                     struct ofpbuf *odp_actions, struct flow_wildcards *wc)
>  {
> @@ -3564,4 +3568,6 @@ commit_odp_actions(const struct flow *flow, struct
> flow *base,
>      commit_mpls_action(flow, base, odp_actions, wc);
>      commit_set_priority_action(flow, base, odp_actions, wc);
>      commit_set_pkt_mark_action(flow, base, odp_actions, wc);
> +
> +    return 0;
>  }
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index 6e06f7f..cba43d5 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -34,6 +34,36 @@ struct nlattr;
>  struct ofpbuf;
>  struct simap;
>
> +#define SLOW_PATH_REASONS                                               \
> +    /* These reasons are mutually exclusive. */                         \
> +    SPR(SLOW_CFM,        "cfm",        "Consists of CFM packets")       \
> +    SPR(SLOW_BFD,        "bfd",        "Consists of BFD packets")       \
> +    SPR(SLOW_LACP,       "lacp",       "Consists of LACP packets")      \
> +    SPR(SLOW_STP,        "stp",        "Consists of STP packets")       \
> +    SPR(SLOW_CONTROLLER, "controller",                                  \
> +        "Sends \"packet-in\" messages to the OpenFlow controller")      \
> +    SPR(SLOW_ACTION,     "action",                                      \
> +        "Uses action(s) not supported by datapath")
> +
> +/* Indexes for slow-path reasons.  Client code uses "enum
> slow_path_reason"
> + * values instead of these, these are just a way to construct those. */
> +enum {
> +#define SPR(ENUM, STRING, EXPLANATION) ENUM##_INDEX,
> +    SLOW_PATH_REASONS
> +#undef SPR
> +};
> +
> +/* Reasons why a subfacet might not be fast-pathable.
> + *
> + * Each reason is a separate bit to allow reasons to be combined. */
> +enum slow_path_reason {
> +#define SPR(ENUM, STRING, EXPLANATION) ENUM = 1 << ENUM##_INDEX,
> +    SLOW_PATH_REASONS
> +#undef SPR
> +};
> +
> +const char *slow_path_reason_to_explanation(enum slow_path_reason);
> +
>  #define ODPP_LOCAL ODP_PORT_C(OVSP_LOCAL)
>  #define ODPP_NONE  ODP_PORT_C(UINT32_MAX)
>
> @@ -129,9 +159,10 @@ const char *odp_key_fitness_to_string(enum
> odp_key_fitness);
>
>  void commit_odp_tunnel_action(const struct flow *, struct flow *base,
>                                struct ofpbuf *odp_actions);
> -void commit_odp_actions(const struct flow *, struct flow *base,
> -                        struct ofpbuf *odp_actions,
> -                        struct flow_wildcards *wc);
> +enum slow_path_reason commit_odp_actions(const struct flow *,
> +                                         struct flow *base,
> +                                         struct ofpbuf *odp_actions,
> +                                         struct flow_wildcards *wc);
>
>  /* ofproto-dpif interface.
>   *
> @@ -187,32 +218,4 @@ void odp_put_tunnel_action(const struct flow_tnl
> *tunnel,
>  void odp_put_pkt_mark_action(const uint32_t pkt_mark,
>                               struct ofpbuf *odp_actions);
>
> -#define SLOW_PATH_REASONS                                               \
> -    /* These reasons are mutually exclusive. */                         \
> -    SPR(SLOW_CFM,        "cfm",        "Consists of CFM packets")       \
> -    SPR(SLOW_BFD,        "bfd",        "Consists of BFD packets")       \
> -    SPR(SLOW_LACP,       "lacp",       "Consists of LACP packets")      \
> -    SPR(SLOW_STP,        "stp",        "Consists of STP packets")       \
> -    SPR(SLOW_CONTROLLER, "controller",                                  \
> -        "Sends \"packet-in\" messages to the OpenFlow controller")
> -
> -/* Indexes for slow-path reasons.  Client code uses "enum
> slow_path_reason"
> - * values instead of these, these are just a way to construct those. */
> -enum {
> -#define SPR(ENUM, STRING, EXPLANATION) ENUM##_INDEX,
> -    SLOW_PATH_REASONS
> -#undef SPR
> -};
> -
> -/* Reasons why a subfacet might not be fast-pathable.
> - *
> - * Each reason is a separate bit to allow reasons to be combined. */
> -enum slow_path_reason {
> -#define SPR(ENUM, STRING, EXPLANATION) ENUM = 1 << ENUM##_INDEX,
> -    SLOW_PATH_REASONS
> -#undef SPR
> -};
> -
> -const char *slow_path_reason_to_explanation(enum slow_path_reason);
> -
>  #endif /* odp-util.h */
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 69ce5af..757f535 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -781,6 +781,7 @@ handle_miss_upcalls(struct udpif *udpif, struct list
> *upcalls)
>              op->u.execute.packet = packet;
>              op->u.execute.actions = miss->xout.odp_actions.data;
>              op->u.execute.actions_len = miss->xout.odp_actions.size;
> +            op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) !=
> 0;
>          }
>      }
>
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index a38a33b..bab06e6 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -1644,8 +1644,9 @@ compose_output_action__(struct xlate_ctx *ctx,
> ofp_port_t ofp_port,
>      }
>
>      if (out_port != ODPP_NONE) {
> -        commit_odp_actions(flow, &ctx->base_flow,
> -                           &ctx->xout->odp_actions, &ctx->xout->wc);
> +        ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow,
> +                                              &ctx->xout->odp_actions,
> +                                              &ctx->xout->wc);
>          nl_msg_put_odp_port(&ctx->xout->odp_actions,
> OVS_ACTION_ATTR_OUTPUT,
>                              out_port);
>
> @@ -1798,8 +1799,9 @@ execute_controller_action(struct xlate_ctx *ctx, int
> len,
>      key.pkt_mark = 0;
>      memset(&key.tunnel, 0, sizeof key.tunnel);
>
> -    commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
> -                       &ctx->xout->odp_actions, &ctx->xout->wc);
> +    ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow,
> &ctx->base_flow,
> +                                          &ctx->xout->odp_actions,
> +                                          &ctx->xout->wc);
>
>      odp_execute_actions(NULL, packet, &key, ctx->xout->odp_actions.data,
>                          ctx->xout->odp_actions.size, NULL, NULL);
> @@ -2132,8 +2134,9 @@ xlate_sample_action(struct xlate_ctx *ctx,
>     * the same percentage. */
>    uint32_t probability = (os->probability << 16) | os->probability;
>
> -  commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
> -                     &ctx->xout->odp_actions, &ctx->xout->wc);
> +  ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
> +                                        &ctx->xout->odp_actions,
> +                                        &ctx->xout->wc);
>
>    compose_flow_sample_cookie(os->probability, os->collector_set_id,
>                               os->obs_domain_id, os->obs_point_id,
> &cookie);
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 01e6881..1f85fb3 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -3992,7 +3992,8 @@ execute_actions(struct ofproto *ofproto_, const
> struct flow *flow,
>      odp_flow_key_from_flow(&key, flow, ofp_port_to_odp_port(ofproto,
> in_port));
>
>      error = dpif_execute(ofproto->backer->dpif, key.data, key.size,
> -                         xout.odp_actions.data, xout.odp_actions.size,
> packet);
> +                         xout.odp_actions.data, xout.odp_actions.size,
> packet,
> +                         (xout.slow & SLOW_ACTION) != 0);
>      xlate_out_uninit(&xout);
>
>      return error;
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20131009/33f333fe/attachment-0003.html>


More information about the dev mailing list