[ovs-dev] [PATCH 4/4] User-Space MPLS actions and matches

Isaku Yamahata yamahata at valinux.co.jp
Mon Oct 15 22:23:32 UTC 2012


On Mon, Oct 15, 2012 at 02:42:27PM +0900, Simon Horman wrote:
> On Fri, Oct 12, 2012 at 01:05:05PM +0900, Simon Horman wrote:
> > On Fri, Oct 12, 2012 at 10:24:24AM +0900, Isaku Yamahata wrote:
> > > On Fri, Oct 12, 2012 at 09:04:09AM +0900, Simon Horman wrote:
> > > > On Thu, Oct 11, 2012 at 10:10:39PM +0900, Isaku Yamahata wrote:
> > > > > On Thu, Oct 11, 2012 at 11:14:40AM +0900, Simon Horman wrote:
> > > > > > diff --git a/lib/odp-util.c b/lib/odp-util.c
> > > > > > index 5a1d31c..5ffbaef 100644
> > > > > > --- a/lib/odp-util.c
> > > > > > +++ b/lib/odp-util.c
> > > > > ...
> > > > > > @@ -1911,6 +2045,33 @@ commit_vlan_action(const struct flow *flow, struct flow *base,
> > > > > >  }
> > > > > >  
> > > > > >  static void
> > > > > > +commit_mpls_action(const struct flow *flow, struct flow *base,
> > > > > > +                   struct ofpbuf *odp_actions)
> > > > > > +{
> > > > > > +    if (flow->mpls_lse == base->mpls_lse) {
> > > > > > +        return;
> > > > > > +    }
> > > > > 
> > > > > Not only label value, but also The stack depth needs to be compared. 
> > > > 
> > > > I'm a little confused.
> > > > 
> > > > This series doesn't track the stack depth as it has an implicit
> > > > limit of one label.
> > > 
> > > The following cases for example. 
> > > (I might be wrong because it depends on how flow->mpls_lse is managed.)
> > > 
> > > outer A, B inner --push_mpls(A)--> outer A, A, B inner
> > > outer A, B, C inner --pop_mpls(), set_mpls(B)--> outer A, C inner
> > > 
> > > I'm not sure if the latter case should be allowed with the scope of this
> > > patch series, though.
> > 
> > I think it should, though I haven't thought about it in depth.
> > 
> > > mpls_lse = 0 value is internally used for special meaning. I suppose 0 is
> > > also valid label value. But I'm not a MPLS expert.
> > 
> > I also think it is also a valid label.
> > It would be nice to avoid using it as a special value.
> > 
> > > Need to track mpls stack depth or depth change?
> > 
> > Thanks, I see the problem now. I'll see about modifying the
> > patch to track depth changes.
> 
> Hi Yamahata-san,
> 
> as the MPLS patch is rather large I am posting an incremental
> patch below that I believe implements the stack tracking that you suggest.
> It seems to be a good idea as illustrated by the corrections to the
> test-suite.
> 
> My intention is to roll this change into the "User-Space MPLS actions and
> matches" patch if it receives favourable review.

Basically looks good. some comments inlined.


> ----------------------------------------------------------------
> Simon Horman <horms at verge.net.au>
> 
> MPLS stack depth
> 
> * Track MPLS stack depth and use it to determine if the MPLS label should
>   be pushed, popped or modified
> * No longer use 0 as a special MPLS LSE value
> * Correct BoS check logic in parse_remaining_mpls() as
>   that function is short and otherwise modified
>   by this change.
> ---
>  lib/flow.c             |   20 ++++++++------------
>  lib/flow.h             |    3 ++-
>  lib/odp-util.c         |   19 ++++++++-----------
>  lib/packets.c          |   25 ++++++++++++++++++++++++-
>  lib/packets.h          |    2 ++
>  ofproto/ofproto-dpif.c |   39 +++++++++++++++++++++++++++++----------
>  tests/ofproto-dpif.at  |    6 +++---
>  tests/test-bundle.c    |    1 +
>  tests/test-multipath.c |    1 +
>  utilities/ovs-dpctl.c  |    2 +-
>  10 files changed, 79 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/flow.c b/lib/flow.c
> index 5291e19..d91201e 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -93,13 +93,13 @@ pull_icmpv6(struct ofpbuf *packet)
>  }
>  
>  static void
> -parse_remaining_mpls(struct ofpbuf *b)
> +parse_remaining_mpls(struct ofpbuf *b, struct flow *flow)
>  {
>      /* Proceed with parsing remaining MPLS headers. */
>      struct mpls_hdr *mh;
>      while ((mh = ofpbuf_try_pull(b, sizeof *mh)) &&
> -           (mh->mpls_lse & htonl(MPLS_BOS_MASK))) {
> -        ;
> +           !(mh->mpls_lse & htonl(MPLS_BOS_MASK))) {
> +        flow->mpls_depth++;
>      }
>  }
>  
> @@ -111,6 +111,7 @@ parse_mpls(struct ofpbuf *b, struct flow *flow)
>      if (b->size >= sizeof(struct mpls_hdr) + sizeof(ovs_be16)) {
>          struct mpls_hdr *mh = ofpbuf_pull(b, sizeof *mh);
>          flow->mpls_lse = mh->mpls_lse;
> +        flow->mpls_depth++;
>      }
>  }
>  
> @@ -406,7 +407,7 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority,
>          packet->l2_5 = b.data;
>          parse_mpls(&b, flow);
>          if (!(flow->mpls_lse & htonl(MPLS_BOS_MASK))) {
> -            parse_remaining_mpls(&b);
> +            parse_remaining_mpls(&b, flow);
>          }
>          if (packet->size >= sizeof *ih &&
>              IP_VER(ih->ip_ihl_ver) == IP_VERSION) {
> @@ -580,7 +581,7 @@ flow_format(struct ds *ds, const struct flow *flow)
>                    ETH_ADDR_ARGS(flow->dl_dst),
>                    ntohs(flow->dl_type));
>  
> -    if (flow->mpls_lse) {
> +    if (flow->mpls_depth) {
>          ds_put_format(ds, ",mpls(");
>          ds_put_format(ds, "label:%"PRIu32",tc:%d,ttl:%d,bos:%d",
>                        mpls_lse_to_label(flow->mpls_lse),
> @@ -883,13 +884,8 @@ flow_set_vlan_pcp(struct flow *flow, uint8_t pcp)
>  void
>  flow_set_mpls_label(struct flow *flow, ovs_be32 label)
>  {
> -    if (label == htonl(0)) {
> -        flow->mpls_lse = htonl(0);
> -    } else {
> -        flow->mpls_lse &= ~htonl(MPLS_LABEL_MASK);
> -        flow->mpls_lse |=
> -            htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK);
> -    }
> +    flow->mpls_lse &= ~htonl(MPLS_LABEL_MASK);
> +    flow->mpls_lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK);
>  }
>  
>  /* Sets the MPLS TC that 'flow' matches to 'tc', which should be in the
> diff --git a/lib/flow.h b/lib/flow.h
> index dc02a89..f6d0ca2 100644
> --- a/lib/flow.h
> +++ b/lib/flow.h
> @@ -91,7 +91,8 @@ struct flow {
>      uint8_t arp_tha[6];         /* ARP/ND target hardware address. */
>      uint8_t nw_ttl;             /* IP TTL/Hop Limit. */
>      uint8_t nw_frag;            /* FLOW_FRAG_* flags. */
> -    uint8_t zeros[4];           /* Must be zero. */
> +    uint16_t mpls_depth;        /* Depth of MPLS stack. */
> +    uint8_t zeros[2];           /* Must be zero. */
>  };
>  BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
>  
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 5ffbaef..cf8a531 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1395,7 +1395,7 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow)
>          memcpy(arp_key->arp_tha, flow->arp_tha, ETH_ADDR_LEN);
>      }
>  
> -    if (flow->mpls_lse != htonl(0)) {
> +    if (flow->mpls_depth) {
>          struct ovs_key_mpls *mpls_key;
>  
>          mpls_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_MPLS,
> @@ -1634,14 +1634,10 @@ parse_mpls_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
>          return ODP_FIT_TOO_LITTLE;
>      }
>      mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]);
> -    if (mpls_lse == htonl(0)) {
> -        /* Corner case for a truncated MPLS header. */
> -        return fitness;
> -    }
>  
>      /* Set mpls_lse. */
>      flow->mpls_lse = mpls_lse;
> -
> +    flow->mpls_depth++;
>  
>      fitness = check_expectations(present_attrs, out_of_range_attr,
>                                   expected_attrs, key, key_len);
> @@ -2048,14 +2044,14 @@ static void
>  commit_mpls_action(const struct flow *flow, struct flow *base,
>                     struct ofpbuf *odp_actions)
>  {
> -    if (flow->mpls_lse == base->mpls_lse) {
> +    if (flow->mpls_lse == base->mpls_lse &&
> +        flow->mpls_depth == base->mpls_depth) {
>          return;
>      }
>  
> -    if (base->mpls_lse != htonl(0) && flow->mpls_lse == htonl(0)) {
> -        nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_POP_MPLS,
> -                        flow->dl_type);
> -    } else if (base->mpls_lse == htonl(0) && flow->mpls_lse != htonl(0)) {
> +    if (flow->mpls_depth < base->mpls_depth) {
> +        nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_POP_MPLS, flow->dl_type);
> +    } else if (flow->mpls_depth > base->mpls_depth) {
>          struct ovs_action_push_mpls *mpls;
>  
>          mpls = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_PUSH_MPLS,

Minor nit pick. Right now only single lable push/pop is allowed, so how about
assert(diff mpsl_depth <= 1)

> @@ -2069,6 +2065,7 @@ commit_mpls_action(const struct flow *flow, struct flow *base,
>  
>      base->dl_type = flow->dl_type;
>      base->mpls_lse = flow->mpls_lse;
> +    base->mpls_depth = flow->mpls_depth;
>  }
>  
>  static void
> diff --git a/lib/packets.c b/lib/packets.c
> index 3f92b6a..e3182a1 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -193,7 +193,8 @@ eth_push_vlan(struct ofpbuf *packet, ovs_be16 tci)
>  
>  /* Removes outermost VLAN header (if any is present) from 'packet'.
>   *
> - * 'packet->l2' must initially point to 'packet''s Ethernet header. */
> + * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header
> + * or may be NULL if there are no MPLS headers. */
>  void
>  eth_pop_vlan(struct ofpbuf *packet)
>  {
> @@ -212,6 +213,28 @@ eth_pop_vlan(struct ofpbuf *packet)
>      }
>  }
>  
> +/* Return depth of mpls stack.
> + *
> + * 'packet->l2' must initially point to 'packet''s Ethernet header. */
> +uint16_t
> +eth_mpls_depth(struct ofpbuf *packet)
> +{
> +    struct mpls_hdr *mh = packet->l2_5;
> +    uint16_t depth = 1;
> +
> +    if (!mh) {
> +        return 0;
> +    }
> +
> +    while (packet->size >= ((char *)mh - (char *)packet->data) + sizeof *mh &&
> +           !(mh->mpls_lse & htonl(MPLS_BOS_MASK))) {
> +        mh++;
> +        depth++;
> +    }
> +
> +    return depth;
> +}
> +
>  /* Set ethertype of the packet. */
>  void
>  set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type)
> diff --git a/lib/packets.h b/lib/packets.h
> index 299da8f..4a5ef2a 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -140,6 +140,8 @@ void compose_rarp(struct ofpbuf *, const uint8_t eth_src[ETH_ADDR_LEN]);
>  void eth_push_vlan(struct ofpbuf *, ovs_be16 tci);
>  void eth_pop_vlan(struct ofpbuf *);
>  
> +uint16_t eth_mpls_depth(struct ofpbuf *packet);
> +
>  void set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type);
>  
>  const char *eth_from_hex(const char *hex, struct ofpbuf **packetp);
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index e5aa4e8..c9fb920 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4954,6 +4954,7 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
>      uint16_t odp_port = ofp_port_to_odp_port(ofp_port);
>      ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
>      ovs_be32 flow_mpls_lse = ctx->flow.mpls_lse;
> +    uint16_t flow_mpls_depth = ctx->flow.mpls_depth;
>      uint8_t flow_nw_tos = ctx->flow.nw_tos;
>      uint16_t out_port;
>  
> @@ -4992,6 +4993,7 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,
>      ctx->nf_output_iface = ofp_port;
>      ctx->flow.vlan_tci = flow_vlan_tci;
>      ctx->flow.mpls_lse = flow_mpls_lse;
> +    ctx->flow.mpls_depth = flow_mpls_depth;
>      ctx->flow.nw_tos = flow_nw_tos;
>  }
>  
> @@ -5133,6 +5135,7 @@ execute_controller_action(struct action_xlate_ctx *ctx, int len,
>  
>      if (packet->l2 && packet->l3) {
>          struct eth_header *eh;
> +        uint16_t mpls_depth;
>  
>          eth_pop_vlan(packet);
>          eh = packet->l2;
> @@ -5161,15 +5164,29 @@ execute_controller_action(struct action_xlate_ctx *ctx, int len,
>              eth_push_vlan(packet, ctx->flow.vlan_tci);
>          }
>  
> -        if (ctx->flow.mpls_lse) {
> -            push_mpls(packet, ctx->flow.dl_type);
> -            set_mpls_lse(packet, ctx->flow.mpls_lse);
> +        mpls_depth = eth_mpls_depth(packet);
> +        VLOG_WARN("execute_controller_action: mpls depth %p %u %u",
> +                  packet->l2_5, mpls_depth, ctx->flow.mpls_depth);
> +        if (packet->l2_5) {
> +            struct mpls_hdr *mh = packet->l2_5;
> +        VLOG_WARN("execute_controller_action: size %zu %zu",
> +                  packet->size,
> +                  ((char *)mh - (char *)packet->data) + sizeof *mh);
> +        VLOG_WARN("execute_controller_action: bos %d",
> +                  mh->mpls_lse & htonl(MPLS_BOS_MASK));
> +

Indent.

>          }
>  
> -        if (!ctx->flow.mpls_lse &&
> -            (eh->eth_type == htons(ETH_TYPE_MPLS) ||
> -             eh->eth_type == htons(ETH_TYPE_MPLS_MCAST))) {
> +        if (mpls_depth < ctx->flow.mpls_depth) {
> +            push_mpls(packet, ctx->flow.dl_type);
> +            set_mpls_lse(packet, ctx->flow.mpls_lse);
> +            VLOG_WARN("execute_controller_action: push mpls");
> +        } else if (mpls_depth > ctx->flow.mpls_depth) {
>              pop_mpls(packet, ctx->flow.dl_type);
> +            VLOG_WARN("execute_controller_action: pop mpls");
> +        } else if (mpls_depth) {
> +            set_mpls_lse(packet, ctx->flow.mpls_lse);
> +            VLOG_WARN("execute_controller_action: set");
>          }
>  
>          if (packet->l4) {
> @@ -5210,9 +5227,10 @@ compose_mpls_push_action(struct action_xlate_ctx *ctx, ovs_be16 eth_type)
>      assert(eth_type == htons(ETH_TYPE_MPLS) ||
>             eth_type == htons(ETH_TYPE_MPLS_MCAST));
>  
> -    if (ctx->base_flow.mpls_lse != htonl(0)) {
> +    if (ctx->base_flow.mpls_depth) {
>          ctx->flow.mpls_lse = ctx->base_flow.mpls_lse;
>          ctx->flow.mpls_lse &= ~htonl(MPLS_BOS_MASK);
> +        ctx->flow.mpls_depth = 1;
           ctx->flow.mpls_depth++
>      } else {
>          ovs_be32 mpls_label, mpls_tc, mpls_ttl, mpls_bos;
>  
> @@ -5232,6 +5250,7 @@ compose_mpls_push_action(struct action_xlate_ctx *ctx, ovs_be16 eth_type)
>          mpls_bos = htonl(0x1 << MPLS_BOS_SHIFT);
>          ctx->flow.mpls_lse = mpls_label | mpls_tc | mpls_ttl | mpls_bos;
>          ctx->flow.encap_dl_type = ctx->flow.dl_type;
> +        ctx->flow.mpls_depth++;
           ctx->flow.mpls_depth = 1;

>      }
>      ctx->flow.dl_type = eth_type;
>  }
> @@ -5246,11 +5265,12 @@ compose_mpls_pop_action(struct action_xlate_ctx *ctx, ovs_be16 eth_type)
>  
>      nl_msg_put_be16(ctx->odp_actions, OVS_ACTION_ATTR_POP_MPLS, eth_type);
>   
> -    if (ctx->flow.mpls_lse & htonl(MPLS_BOS_MASK)) {
> +    if (ctx->flow.mpls_depth) {
>          ctx->flow.dl_type = eth_type;
>          ctx->flow.encap_dl_type = htons(0);
> +        ctx->flow.mpls_depth--;
> +        ctx->base_flow.mpls_depth--;
>      }
> -    ctx->base_flow.mpls_lse = htonl(0);
>  }

It looks somewhat confused. Probably something like
the following pseudo code

if (mpls_depth) {
    ctx->flow.mpls_depth--;
    ctx->base_flow.mpls_depth--;
    if (!mpls_depth) {
        ctx->flow.dl_type = eth_type;
       ctx->flow.encap_dl_type = htons(0);
    }
}

>  static bool
> @@ -5640,7 +5660,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>  
>          case OFPACT_POP_MPLS:
>              compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
> -            ctx->flow.mpls_lse = htonl(0);
>              break;
>  
>          case OFPACT_DEC_TTL:
> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
> index e380c7e..d5aa0bd 100644
> --- a/tests/ofproto-dpif.at
> +++ b/tests/ofproto-dpif.at
> @@ -342,13 +342,13 @@ done
>  
>  OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
>  AT_CHECK([cat ofctl_monitor.log], [0], [dnl
> -NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
> +NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=60 in_port=1 (via action) data_len=60 (unbuffered)
>  priority:0,metadata:0,in_port:0000,tci(0) mac(50:55:55:55:55:55->50:54:00:00:00:07) type:8847,mpls(label:1000,tc:7,ttl:64,bos:1)
>  dnl
> -NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
> +NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=60 in_port=1 (via action) data_len=60 (unbuffered)
>  priority:0,metadata:0,in_port:0000,tci(0) mac(50:55:55:55:55:55->50:54:00:00:00:07) type:8847,mpls(label:1000,tc:7,ttl:64,bos:1)
>  dnl
> -NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
> +NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=60 in_port=1 (via action) data_len=60 (unbuffered)
>  priority:0,metadata:0,in_port:0000,tci(0) mac(50:55:55:55:55:55->50:54:00:00:00:07) type:8847,mpls(label:1000,tc:7,ttl:64,bos:1)
>  ])
>  
> diff --git a/tests/test-bundle.c b/tests/test-bundle.c
> index aa8b6f0..f5b24b4 100644
> --- a/tests/test-bundle.c
> +++ b/tests/test-bundle.c
> @@ -137,6 +137,7 @@ main(int argc, char *argv[])
>      for (i = 0; i < N_FLOWS; i++) {
>          random_bytes(&flows[i], sizeof flows[i]);
>          memset(flows[i].zeros, 0, sizeof flows[i].zeros);
> +        flows[i].mpls_depth = 0;
>          flows[i].regs[0] = OFPP_NONE;
>      }
>  
> diff --git a/tests/test-multipath.c b/tests/test-multipath.c
> index b990c13..8442bc2 100644
> --- a/tests/test-multipath.c
> +++ b/tests/test-multipath.c
> @@ -61,6 +61,7 @@ main(int argc, char *argv[])
>  
>              random_bytes(&flow, sizeof flow);
>              memset(flow.zeros, 0, sizeof flow.zeros);
> +            flow.mpls_depth = 0;
>  
>              mp.max_link = n - 1;
>              multipath_execute(&mp, &flow);
> diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
> index d254d01..d9f8925 100644
> --- a/utilities/ovs-dpctl.c
> +++ b/utilities/ovs-dpctl.c
> @@ -935,7 +935,7 @@ dpctl_normalize_actions(int argc, char *argv[])
>              printf("no vlan: ");
>          }
>  
> -        if (af->flow.mpls_lse != htonl(0)) {
> +        if (af->flow.mpls_depth) {
>              printf("mpls(label=%"PRIu32",tc=%d,ttl=%d): ",
>                     mpls_lse_to_label(af->flow.mpls_lse),
>                     mpls_lse_to_tc(af->flow.mpls_lse),
> -- 
> 1.7.10.4
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
> 

-- 
yamahata



More information about the dev mailing list