[ovs-dev] [PATCH v10 4/4] Add packet recirculation

Rajahalme, Jarno (NSN - FI/Espoo) jarno.rajahalme at nsn.com
Fri May 24 08:43:59 UTC 2013


Simon,

Here is some initial review of the ofproto-dpif.c changes,

  Jarno

On May 24, 2013, at 10:18 , ext Simon Horman wrote:

> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 49f0270..8b1ccac 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -43,6 +43,7 @@
> #include "odp-util.h"
> #include "ofp-util.h"
> #include "ofpbuf.h"
> +#include "odp-execute.h"
> #include "ofp-actions.h"
> #include "ofp-parse.h"
> #include "ofp-print.h"
> @@ -121,7 +122,8 @@ static struct rule_dpif *rule_dpif_miss_rule(struct ofproto_dpif *ofproto,
> 
> static void rule_credit_stats(struct rule_dpif *,
>                               const struct dpif_flow_stats *);
> -static void flow_push_stats(struct facet *, const struct dpif_flow_stats *);
> +static void flow_push_stats(struct facet *, const struct dpif_flow_stats *,
> +                            const struct ofpact *, size_t ofpacts_len);
> static tag_type rule_calculate_tag(const struct flow *,
>                                    const struct minimask *, uint32_t basis);
> static void rule_invalidate(const struct rule_dpif *);
> @@ -289,6 +291,17 @@ struct action_xlate_ctx {
>     uint16_t nf_output_iface;   /* Output interface index for NetFlow. */
>     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
> 
> +    size_t ofpacts_len;         /* The number of bytes of the ofpacts
> +                                 * argument to xlate_actions() processed
> +                                 * by it. This is used to calculate an
> +                                 * offset into ofpacts for calls to
> +                                 * xlate_actions on recirculated packets */
> +
> +    uint32_t recirculation_id;  /* skb_mark to use to identify
> +                                 * recirculation. */

If this was 0 by default, and set to a valid recirculation_id when recirculation is to take place, then you might get rid of the bool recirculated?

> +    bool recirculated;          /* True if the context does not add a
> +                                 * recirculate action. False otherwise. */
> +

The comment seems the reverse of the intended meaning. If not then need to elaborate to make this clearer.

> /* xlate_actions() initializes and uses these members, but the client has no
>  * reason to look at them. */
> 
> @@ -321,7 +334,8 @@ static void action_xlate_ctx_init(struct action_xlate_ctx *,
>                                   struct ofproto_dpif *, const struct flow *,
>                                   const struct initial_vals *initial_vals,
>                                   struct rule_dpif *,
> -                                  uint8_t tcp_flags, const struct ofpbuf *);
> +                                  uint8_t tcp_flags, const struct ofpbuf *,
> +                                  uint32_t recirculation_id);
> static void xlate_actions(struct action_xlate_ctx *,
>                           const struct ofpact *ofpacts, size_t ofpacts_len,
>                           struct ofpbuf *odp_actions);
> @@ -504,17 +518,46 @@ struct facet {
>     struct subfacet one_subfacet;
> 
>     long long int learn_rl;      /* Rate limiter for facet_learn(). */
> +
> +    const struct ofpact *ofpacts;   /* ofpacts for this facet.
> +                                     * Will differ from rule->up.ofpacts
> +                                     * if facet is for a recirculated packet. */
> +    size_t ofpacts_len;             /* ofpacts_len for this facet
> +                                     * Will differ from * rule->up.ofpacts_len
> +                                     * if facet is for a recirculated packet. */
> +
> +    uint32_t recirculation_id;       /* Recirculation id.
> +                                      * Non-sero for a facet

s/sero/zero/

> +                                      * that recirculates packets;
> +                                      * used as the value of flow.skb_mark
> +                                      * in the facet of recirculated packets.
> +                                      * Zero otherwise. */
> +    struct hmap_node recirculation_id_hmap_node;
> +                                    /* In owning ofproto's 'recirculation_id'

s/'recirculation_id'/'recirculation_ids'/

> +                                     * hmap. */
> +    const struct ofpact *recirculation_ofpacts;
> +                                    /* ofpacts for facets of packets
> +                                     * recirculated by this facet */
> +    size_t recirculation_ofpacts_len;
> +                                    /* ofpacts_len for facets of packets
> +                                     * recirculated by this facet */
> +
> +    bool recirculated;              /* Facet of a recirculated packet? */

Is this flag needed, as recirculation_id is already non-zero for facets that recirculate packets?

> };
> 
> -static struct facet *facet_create(struct rule_dpif *,
> -                                  const struct flow *, uint32_t hash);
> +static struct facet *facet_create(struct rule_dpif *, const struct flow *,
> +                                  const struct ofpact *, size_t ofpacts_len,
> +                                  bool recirculated, uint32_t hash);
> static void facet_remove(struct facet *);
> static void facet_free(struct facet *);
> 
> +static struct facet *facet_find_by_id(struct ofproto_dpif *, uint32_t id);
> static struct facet *facet_find(struct ofproto_dpif *,
>                                 const struct flow *, uint32_t hash);
> static struct facet *facet_lookup_valid(struct ofproto_dpif *,
>                                         const struct flow *, uint32_t hash);
> +static struct facet *facet_lookup_valid_by_id(struct ofproto_dpif *,
> +                                              uint32_t id);
> static void facet_revalidate(struct facet *);
> static bool facet_check_consistency(struct facet *);
> 
> @@ -714,6 +757,7 @@ struct ofproto_dpif {
> 
>     /* Facets. */
>     struct hmap facets;
> +    struct hmap recirculation_ids;
>     struct hmap subfacets;
>     struct governor *governor;
>     long long int consistency_rl;
> @@ -1373,6 +1417,7 @@ construct(struct ofproto *ofproto_)
>     ofproto->has_bonded_bundles = false;
> 
>     hmap_init(&ofproto->facets);
> +    hmap_init(&ofproto->recirculation_ids);
>     hmap_init(&ofproto->subfacets);
>     ofproto->governor = NULL;
>     ofproto->consistency_rl = LLONG_MIN;
> @@ -3484,6 +3529,58 @@ port_is_lacp_current(const struct ofport *ofport_)
>             : -1);
> }
> 
> +/* Recirculation Id */
> +#define RECIRCULATION_ID_NONE  0
> +#define RECIRCULATION_ID_DUMMY 2
> +#define RECIRCULATION_ID_MIN   RECIRCULATION_ID_DUMMY
> +
> +#define RECIRCULATION_ID_MAX_LOOP 1024  /* Arbitrary value to prevent
> +                                         * endless loop */
> +
> +static uint32_t recirculation_id_hash(uint32_t id)
> +{
> +    return hash_words(&id, 1, 0);
> +}
> +
> +static uint32_t recirculation_id = RECIRCULATION_ID_MIN;
> +static uint32_t validated_recirculation_id = RECIRCULATION_ID_NONE;
> +
> +static uint32_t peek_recirculation_id(struct ofproto_dpif *ofproto)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
> +
> +    int loop = RECIRCULATION_ID_MAX_LOOP;
> +
> +    if (validated_recirculation_id == recirculation_id) {
> +        return recirculation_id;
> +    }
> +
> +    while (loop--) {
> +        if (recirculation_id < RECIRCULATION_ID_MIN)
> +            recirculation_id = RECIRCULATION_ID_MIN;
> +        /* Skip IPSEC_MARK bit it is reserved */
> +        if (recirculation_id & IPSEC_MARK) {
> +            recirculation_id++;
> +            ovs_assert(!(recirculation_id & IPSEC_MARK));

This essentially assumes the IPSEC_MARK bit to be 1. This would work for any bit, so the assert would be unnecessary:

recirculation_id += IPSEC_MARK;

> +        }
> +        if (!facet_find_by_id(ofproto, recirculation_id)) {
> +            validated_recirculation_id = recirculation_id;
> +            return recirculation_id;
> +        }
> +        recirculation_id++;
> +    }
> +
> +    VLOG_WARN_RL(&rl, "Failed to allocate recirulation id after %d attempts\n",
> +                 RECIRCULATION_ID_MAX_LOOP);
> +    return RECIRCULATION_ID_NONE;
> +}
> +
> +static uint32_t get_recirculation_id(void)
> +{
> +    ovs_assert(recirculation_id == validated_recirculation_id);
> +    return recirculation_id++;
> +}
> +
> /* Upcall handling. */
> 
> /* Flow miss batching.
> @@ -3646,6 +3743,14 @@ static bool
> flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
>                             struct flow_miss *miss, uint32_t hash)
> {
> +    /* If the packet is MPLS then recirculation may be used and
> +     * this will not be possible with facets if there are no recirculation
> +     * ids available */
> +    if (eth_type_mpls(miss->flow.dl_type) &&
> +        peek_recirculation_id(ofproto) == RECIRCULATION_ID_NONE) {
> +        return false;
> +    }
> +
>     if (!ofproto->governor) {
>         size_t n_subfacets;
> 
> @@ -3661,18 +3766,66 @@ flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
>                                         list_size(&miss->packets));
> }
> 
> +static const struct flow *
> +xlate_with_recirculate(struct ofproto_dpif *ofproto, struct rule_dpif *rule,
> +                       const struct flow *flow, struct flow *flow_storage,
> +                       const struct initial_vals *initial_vals,
> +                       const struct ofpact *ofpacts, size_t ofpacts_len,
> +                       struct ofpbuf *odp_actions,
> +                       struct dpif_flow_stats *stats, struct ofpbuf *packet)

It was a bit surprising from the name of the function that the packet is actually modified. Maybe add a comment above?

> +{
> +    struct initial_vals initial_vals_ = *initial_vals;
> +
> +    while (1) {
> +        struct action_xlate_ctx ctx;
> +
> +        ofpbuf_clear(odp_actions);
> +        action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals_,
> +                              rule, stats->tcp_flags, packet,
> +                              RECIRCULATION_ID_DUMMY);
> +        ctx.resubmit_stats = stats;
> +        xlate_actions(&ctx, ofpacts, ofpacts_len, odp_actions);
> +
> +        if (!ctx.recirculated) {
> +            break;
> +        }
> +
> +        /* Copy flow into flow_storage and update flow to point
> +         * to flow_storage if this is the first iteration of the loop */
> +        if (flow != flow_storage) {
> +            *flow_storage = *flow;
> +            flow = flow_storage;
> +        }
> +
> +        /* Update the packet */
> +        odp_execute_actions(NULL, packet, odp_actions->data,
> +                            odp_actions->size, flow_storage, NULL, NULL);
> +        ofpbuf_clear(odp_actions);
> +
> +        /* Replace the flow */
> +        flow_extract(packet, flow->skb_priority, flow->skb_mark,
> +                     &flow->tunnel, flow->in_port, flow_storage);
> +        initial_vals_.vlan_tci = flow->vlan_tci;
> +
> +        ofpacts = ofpact_end(ofpacts, ctx.ofpacts_len);
> +        ofpacts_len -= ctx.ofpacts_len;
> +    }
> +
> +    return flow;
> +}
> 
...
> @@ -4874,6 +5113,33 @@ facet_find(struct ofproto_dpif *ofproto,
>     return NULL;
> }
> 
> +/* Searches 'ofproto''s table of facets with recirculation ids
> + * for a facet whose recirculation_id is 'id'.
> + * Returns it if found, otherwise a null pointer.
> + *
> + * The returned facet might need revalidation; use facet_lookup_valid_by_id()
> + * instead if that is important. */
> +static struct facet *
> +facet_find_by_id(struct ofproto_dpif *ofproto, uint32_t id)
> +{
> +    uint32_t hash = recirculation_id_hash(id);
> +    struct facet *facet;
> +
> +    /* some values are never used */
> +    if (id == RECIRCULATION_ID_NONE || (id & IPSEC_MARK)) {
> +        return NULL;
> +    }

Maybe compute hash here instead?

> +
> +    HMAP_FOR_EACH_WITH_HASH (facet, recirculation_id_hmap_node,
> +                             hash, &ofproto->recirculation_ids) {
> +        if (facet->recirculation_id == id) {
> +            return facet;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
...
> @@ -6473,6 +6932,16 @@ execute_dec_mpls_ttl_action(struct action_xlate_ctx *ctx)
> }
> 
> static void
> +execute_recircualte_action(struct action_xlate_ctx *ctx)

s/recircualte/recirculate/

> +{
> +    if (ctx->recirculation_id == RECIRCULATION_ID_NONE) {
> +        ctx->recirculation_id = get_recirculation_id();
> +    }
> +    ctx->recirculated = true;
> +    ctx->flow.skb_mark = ctx->recirculation_id;
> +}
> +
> +static void
> xlate_output_action(struct action_xlate_ctx *ctx,
>                     uint16_t port, uint16_t max_len, bool may_packet_in)
> {
> @@ -6725,6 +7194,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>                  struct action_xlate_ctx *ctx)
> {
>     bool was_evictable = true;
> +    bool may_recirculate = false;

"may_recirculate" is not exactly what is meant here. It is more like "recirculate before any further L3+ actions", so maybe rename this as "may_xlate_l3_actions", reversing the truth value?

>     const struct ofpact *a;
> 
>     if (ctx->rule) {
> @@ -6793,18 +7263,30 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
> 
>         case OFPACT_SET_IPV4_SRC:
>             if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
> +                if (may_recirculate) {
> +                    execute_recircualte_action(ctx);
> +                    goto out;
> +                }

Repetition of this block is ugly. Simplify it with a new label (e.g., "recirculate_out:)" that would do the call to execute_recirculate_action() right above "out:", hence you could write here:

  if (may_recirculate) {
    goto recirculate_out;
  }

>                 ctx->flow.nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
>             }
>             break;
> 
>         case OFPACT_SET_IPV4_DST:
>             if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
> +                if (may_recirculate) {
> +                    execute_recircualte_action(ctx);
> +                    goto out;
> +                }
>                 ctx->flow.nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
>             }
>             break;
> 
>         case OFPACT_SET_IPV4_DSCP:
>             /* OpenFlow 1.0 only supports IPv4. */
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
>                 ctx->flow.nw_tos &= ~IP_DSCP_MASK;
>                 ctx->flow.nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp;
> @@ -6813,12 +7295,20 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
> 
>         case OFPACT_SET_L4_SRC_PORT:
>             if (is_ip_any(&ctx->flow)) {
> +                if (may_recirculate) {
> +                    execute_recircualte_action(ctx);
> +                    goto out;
> +                }
>                 ctx->flow.tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
>             }
>             break;
> 
>         case OFPACT_SET_L4_DST_PORT:
>             if (is_ip_any(&ctx->flow)) {
> +                if (may_recirculate) {
> +                    execute_recircualte_action(ctx);
> +                    goto out;
> +                }
>                 ctx->flow.tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
>             }
>             break;
> @@ -6840,29 +7330,50 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>             break;
> 
>         case OFPACT_REG_MOVE:
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             nxm_execute_reg_move(ofpact_get_REG_MOVE(a), &ctx->flow);
>             break;
> 
>         case OFPACT_REG_LOAD:
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             nxm_execute_reg_load(ofpact_get_REG_LOAD(a), &ctx->flow);
>             break;
> 
>         case OFPACT_STACK_PUSH:
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), &ctx->flow,
>                                    &ctx->stack);
>             break;
> 
>         case OFPACT_STACK_POP:
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             nxm_execute_stack_pop(ofpact_get_STACK_POP(a), &ctx->flow,
>                                   &ctx->stack);
>             break;

I guess that the above four actions could be executed on a limited set of registers even if "may_recirculate"?

> 
>         case OFPACT_PUSH_MPLS:
>             execute_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a)->ethertype);
> +            may_recirculate = false;
>             break;
> 
>         case OFPACT_POP_MPLS:
>             execute_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
> +            if (ctx->flow.dl_type == htons(ETH_TYPE_IP) ||
> +                ctx->flow.dl_type == htons(ETH_TYPE_IPV6)) {
> +                may_recirculate = true;
> +            }
>             break;
> 
>         case OFPACT_SET_MPLS_TTL:
> @@ -6878,7 +7389,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>             break;
> 
>         case OFPACT_DEC_TTL:
> -            if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            } else if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
>                 goto out;
>             }
>             break;
> @@ -6888,10 +7402,18 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>             break;
> 
>         case OFPACT_MULTIPATH:
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             multipath_execute(ofpact_get_MULTIPATH(a), &ctx->flow);
>             break;
> 
>         case OFPACT_BUNDLE:
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             ctx->ofproto->has_bundle_action = true;
>             xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
>             break;

Are these two always L3+ related actions?

> @@ -6902,6 +7424,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
> 
>         case OFPACT_LEARN:
>             ctx->has_learn = true;
> +            if (may_recirculate) {
> +                execute_recircualte_action(ctx);
> +                goto out;
> +            }
>             if (ctx->may_learn) {
>                 xlate_learn_action(ctx, ofpact_get_LEARN(a));
>             }

I guess there is no point recirculating here if may_learn == false?

> @@ -6969,6 +7495,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>     }
> 
> out:
> +    ctx->ofpacts_len = (char *)(a) - (char *)ofpacts;
>     if (ctx->rule) {
>         ctx->rule->up.evictable = was_evictable;
>     }
> @@ -6979,7 +7506,8 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
>                       struct ofproto_dpif *ofproto, const struct flow *flow,
>                       const struct initial_vals *initial_vals,
>                       struct rule_dpif *rule,
> -                      uint8_t tcp_flags, const struct ofpbuf *packet)
> +                      uint8_t tcp_flags, const struct ofpbuf *packet,
> +                      uint32_t recirculation_id)
> {
>     /* Flow initialization rules:
>      * - 'base_flow' must match the kernel's view of the packet at the
> @@ -7000,7 +7528,13 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
>      *   tunnel output action.
>      * - Tunnel 'base_flow' is completely cleared since that is what the
>      *   kernel does.  If we wish to maintain the original values an action
> -     *   needs to be generated. */
> +     *   needs to be generated.
> +     * - The recirculation_id element of flow and base flow are set to
> +     *   recirculate_id, which is the id that will be used by a recirculation
> +     *   action of one is added. It is stored in flow and base_flow for

s/of/if/

> +     *   convenience as the recirculation_id element of flow and base flow
> +     *   are otherwise unused  by action_xlate_ctx_init().
> +     */
> 

This comment is not completely accurate any more, since there is no recirculation_id in struct flow.

>     ctx->ofproto = ofproto;
>     ctx->flow = *flow;
> @@ -7014,6 +7548,7 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
>     ctx->resubmit_hook = NULL;
>     ctx->report_hook = NULL;
>     ctx->resubmit_stats = NULL;
> +    ctx->recirculation_id = recirculation_id;
> 
>     if (initial_vals) {
>         ctx->base_flow.vlan_tci = initial_vals->vlan_tci;
> 




More information about the dev mailing list