[ovs-dev] [RFC L3 3/4] userspace: add layer 3 flow and switching support

Lorand Jakab lojakab at cisco.com
Fri Oct 4 13:38:13 UTC 2013


This commit relexes the assumption that all packets have an Ethernet
header, and add support for layer 3 flows.  For each packet received on
the linux kernel datapath the l2 and l3 members of struct ofpbuf are
intialized appropriately, and some functions now expect this, in order
to differentiate between layer 2 and layer 3 packets.  struct flow has
now a new 'noeth' member, because we cannot assume that eth_src and
eth_dst being 0 the flow has no Ethernet header.  For layer 3 packets,
the protocol type is still stored in the eth_type member.

Switching L2->L3 and L3->L2 is also implemented, by adding the pop_eth
and push_eth actions respectively when a transition is detected.  The
push_eth action has hardcoded addresses for now, pending ARP resolution
implementation.

Signed-off-by: Lorand Jakab <lojakab at cisco.com>
---
 lib/dpif-linux.c             |    7 +++++++
 lib/dpif.c                   |    6 ++++--
 lib/flow.c                   |   38 +++++++++++++++++++++++++++++++++-----
 lib/flow.h                   |    7 ++++---
 lib/match.c                  |   11 +++++++----
 lib/nx-match.c               |    2 +-
 lib/odp-util.c               |    8 +++-----
 lib/ofp-print.c              |   14 ++++++++++----
 lib/ofp-print.h              |    3 ++-
 lib/ofp-util.c               |    2 +-
 ofproto/ofproto-dpif-xlate.c |   21 ++++++++++++++++++---
 ofproto/ofproto-dpif-xlate.h |    2 +-
 ofproto/ofproto-dpif.c       |    4 ++--
 13 files changed, 93 insertions(+), 32 deletions(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 6f75f57..f0b9d9b 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -1379,6 +1379,13 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
     upcall->key = CONST_CAST(struct nlattr *,
                              nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
     upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
+    if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
+        upcall->packet->l2 = upcall->packet->data;
+        upcall->packet->l3 = NULL;
+    } else {
+        upcall->packet->l2 = NULL;
+        upcall->packet->l3 = upcall->packet->data;
+    }
     upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
     *dp_ifindex = ovs_header->dp_ifindex;
 
diff --git a/lib/dpif.c b/lib/dpif.c
index b66e3bc..92c8cae 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1204,7 +1204,8 @@ dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf)
         char *packet;
 
         packet = ofp_packet_to_string(upcall->packet->data,
-                                      upcall->packet->size);
+                                      upcall->packet->size,
+                                      upcall->packet->l3);
 
         ds_init(&flow);
         odp_flow_key_format(upcall->key, upcall->key_len, &flow);
@@ -1406,7 +1407,8 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
         char *packet;
 
         packet = ofp_packet_to_string(execute->packet->data,
-                                      execute->packet->size);
+                                      execute->packet->size,
+                                      execute->packet->l3);
         ds_put_format(&ds, "%s: execute ", dpif_name(dpif));
         format_odp_actions(&ds, execute->actions, execute->actions_len);
         if (error) {
diff --git a/lib/flow.c b/lib/flow.c
index 0678c6f..491daec 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -40,6 +40,21 @@
 COVERAGE_DEFINE(flow_extract);
 COVERAGE_DEFINE(miniflow_malloc);
 
+static ovs_be16
+get_l3_eth_type(struct ofpbuf *packet)
+{
+    struct ip_header *ip = packet->l3;
+    int ip_ver = IP_VER(ip->ip_ihl_ver);
+    switch (ip_ver) {
+    case 4:
+        return htons(ETH_TYPE_IP);
+    case 6:
+        return htons(ETH_TYPE_IPV6);
+    default:
+        return 0;
+    }
+}
+
 static struct arp_eth_header *
 pull_arp(struct ofpbuf *packet)
 {
@@ -381,6 +396,8 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark,
 
     COVERAGE_INC(flow_extract);
 
+    ovs_assert(packet->l2 != NULL || packet->l3 != NULL);
+
     memset(flow, 0, sizeof *flow);
 
     if (tnl) {
@@ -393,11 +410,21 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark,
     flow->skb_priority = skb_priority;
     flow->pkt_mark = pkt_mark;
 
-    packet->l2   = b.data;
-    packet->l2_5 = NULL;
-    packet->l3   = NULL;
-    packet->l4   = NULL;
     packet->l7   = NULL;
+    packet->l4   = NULL;
+
+    if (packet->l3) {
+        packet->l2_5 = NULL;
+        packet->l2   = NULL;
+        flow->noeth = true;
+        /* We assume L3 packets are either IPv4 or IPv6 */
+        flow->dl_type = get_l3_eth_type(packet);
+        goto layer3;
+    }
+
+    packet->l3   = NULL;
+    packet->l2_5 = NULL;
+    packet->l2   = b.data;
 
     if (b.size < sizeof *eth) {
         return;
@@ -423,6 +450,7 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark,
 
     /* Network layer. */
     packet->l3 = b.data;
+layer3:
     if (flow->dl_type == htons(ETH_TYPE_IP)) {
         const struct ip_header *nh = pull_ip(&b);
         if (nh) {
@@ -513,7 +541,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
 void
 flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
 
     fmd->tun_id = flow->tunnel.tun_id;
     fmd->tun_src = flow->tunnel.ip_src;
diff --git a/lib/flow.h b/lib/flow.h
index 4bd1504..1595e5a 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -37,7 +37,7 @@ struct ofpbuf;
 /* This sequence number should be incremented whenever anything involving flows
  * or the wildcarding of flows changes.  This will cause build assertion
  * failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 21
+#define FLOW_WC_SEQ 22
 
 #define FLOW_N_REGS 8
 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS);
@@ -86,6 +86,7 @@ union flow_in_port {
 * a 32-bit datapath port number.
 */
 struct flow {
+    bool noeth;                 /* Flow has no Ethernet header */
     struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. */
     ovs_be64 metadata;          /* OpenFlow Metadata. */
     struct in6_addr ipv6_src;   /* IPv6 source address. */
@@ -117,8 +118,8 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
 #define FLOW_U32S (sizeof(struct flow) / 4)
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
-BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 152 &&
-                  FLOW_WC_SEQ == 21);
+BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 160 &&
+                  FLOW_WC_SEQ == 22);
 
 /* Represents the metadata fields of struct flow. */
 struct flow_metadata {
diff --git a/lib/match.c b/lib/match.c
index 93f61f9..e963d79 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -81,9 +81,12 @@ match_wc_init(struct match *match, const struct flow *flow)
 
     memset(&wc->masks.metadata, 0xff, sizeof wc->masks.metadata);
     memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port);
-    memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
-    memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
-    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+
+    if (!(flow->noeth)) {
+        memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
+        memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
+        memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+    }
 
     if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
@@ -831,7 +834,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority)
 
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
 
     if (priority != OFP_DEFAULT_PRIORITY) {
         ds_put_format(s, "priority=%u,", priority);
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 8444ab7..497e26c 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -570,7 +570,7 @@ nx_put_raw(struct ofpbuf *b, bool oxm, const struct match *match,
     int match_len;
     int i;
 
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
 
     /* Metadata. */
     if (match->wc.masks.in_port.ofp_port) {
diff --git a/lib/odp-util.c b/lib/odp-util.c
index eda679e..10d7aba 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -3226,12 +3226,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
         eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]);
         memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN);
         memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN);
-        if (is_mask) {
-            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
-        }
-    }
-    if (!is_mask) {
+        flow->noeth = false;
         expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
+    } else {
+        flow->noeth = true;
     }
 
     /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 6fe1cee..c29db58 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -55,13 +55,19 @@ static void ofp_print_error(struct ds *, enum ofperr);
 /* Returns a string that represents the contents of the Ethernet frame in the
  * 'len' bytes starting at 'data'.  The caller must free the returned string.*/
 char *
-ofp_packet_to_string(const void *data, size_t len)
+ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
 {
     struct ds ds = DS_EMPTY_INITIALIZER;
     struct ofpbuf buf;
     struct flow flow;
 
     ofpbuf_use_const(&buf, data, len);
+
+    if (is_layer3)
+        buf.l3 = buf.data;
+    else
+        buf.l2 = buf.data;
+
     flow_extract(&buf, 0, 0, NULL, NULL, &flow);
     flow_format(&ds, &flow);
 
@@ -157,7 +163,7 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh,
     ds_put_char(string, '\n');
 
     if (verbosity > 0) {
-        char *packet = ofp_packet_to_string(pin.packet, pin.packet_len);
+        char *packet = ofp_packet_to_string(pin.packet, pin.packet_len, false);
         ds_put_cstr(string, packet);
         free(packet);
     }
@@ -191,7 +197,7 @@ ofp_print_packet_out(struct ds *string, const struct ofp_header *oh,
     if (po.buffer_id == UINT32_MAX) {
         ds_put_format(string, " data_len=%zu", po.packet_len);
         if (verbosity > 0 && po.packet_len > 0) {
-            char *packet = ofp_packet_to_string(po.packet, po.packet_len);
+            char *packet = ofp_packet_to_string(po.packet, po.packet_len, false);
             ds_put_char(string, '\n');
             ds_put_cstr(string, packet);
             free(packet);
@@ -2694,5 +2700,5 @@ ofp_print(FILE *stream, const void *oh, size_t len, int verbosity)
 void
 ofp_print_packet(FILE *stream, const void *data, size_t len)
 {
-    print_and_free(stream, ofp_packet_to_string(data, len));
+    print_and_free(stream, ofp_packet_to_string(data, len, false));
 }
diff --git a/lib/ofp-print.h b/lib/ofp-print.h
index 825e139..15aa196 100644
--- a/lib/ofp-print.h
+++ b/lib/ofp-print.h
@@ -21,6 +21,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 
 struct ds;
 struct ofp10_match;
@@ -39,7 +40,7 @@ void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity);
 
 char *ofp_to_string(const void *, size_t, int verbosity);
 char *ofp10_match_to_string(const struct ofp10_match *, int verbosity);
-char *ofp_packet_to_string(const void *data, size_t len);
+char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3);
 
 void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *);
 void ofp_print_version(const struct ofp_header *, struct ds *);
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 173b534..a5864d5 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -84,7 +84,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
 void
 ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
 {
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
 
     /* Initialize most of wc. */
     flow_wildcards_init_catchall(wc);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index cced7cc..d79bdf3 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -128,6 +128,7 @@ struct xport {
 
     bool may_enable;                 /* May be enabled in bonds. */
     bool is_tunnel;                  /* Is a tunnel port. */
+    bool is_layer3;                  /* Is a layer 3 port. */
 
     struct cfm *cfm;                 /* CFM handle or null. */
     struct bfd *bfd;                 /* BFD handle or null. */
@@ -379,7 +380,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
                  struct ofport_dpif *peer, int stp_port_no,
                  const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
                  enum ofputil_port_config config, bool is_tunnel,
-                 bool may_enable)
+                 bool may_enable, bool is_layer3)
 {
     struct xport *xport = xport_lookup(ofport);
     size_t i;
@@ -402,6 +403,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
     xport->stp_port_no = stp_port_no;
     xport->is_tunnel = is_tunnel;
     xport->may_enable = may_enable;
+    xport->is_layer3 = is_layer3;
     xport->odp_port = odp_port;
 
     if (xport->netdev != netdev) {
@@ -1288,7 +1290,7 @@ xlate_normal(struct xlate_ctx *ctx)
     }
 
     /* Learn source MAC. */
-    if (ctx->xin->may_learn) {
+    if (ctx->xin->may_learn && !(in_port->is_layer3)) {
         update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
     }
 
@@ -1533,6 +1535,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
     const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
     struct flow_wildcards *wc = &ctx->xout->wc;
     struct flow *flow = &ctx->xin->flow;
+    const struct xport *in_xport = get_ofp_port(ctx->xbridge, flow->in_port.ofp_port);
     ovs_be16 flow_vlan_tci;
     uint32_t flow_pkt_mark;
     uint8_t flow_nw_tos;
@@ -1541,7 +1544,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
 
     /* If 'struct flow' gets additional metadata, we'll need to zero it out
      * before traversing a patch port. */
-    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21);
+    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 22);
 
     if (!xport) {
         xlate_report(ctx, "Nonexistent output port");
@@ -1559,6 +1562,18 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
                                                  xport->xbundle);
     }
 
+    if (!(in_xport->is_layer3) && xport->is_layer3) {
+        odp_put_pop_eth_action(&ctx->xout->odp_actions);
+    }
+
+    if (flow->noeth && !(xport->is_layer3)) {
+        flow->noeth = false;
+        flow->dl_src[0] = 0x02;
+        flow->dl_dst[0] = 0x02;
+        odp_put_push_eth_action(&ctx->xout->odp_actions, flow->dl_src,
+                                flow->dl_dst, flow->dl_type);
+    }
+
     if (xport->peer) {
         const struct xport *peer = xport->peer;
         struct flow old_flow = ctx->xin->flow;
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 6403f50..2169235 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -137,7 +137,7 @@ void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *,
                       const struct bfd *, struct ofport_dpif *peer,
                       int stp_port_no, const struct ofproto_port_queue *qdscp,
                       size_t n_qdscp, enum ofputil_port_config, bool is_tunnel,
-                      bool may_enable) OVS_REQ_WRLOCK(xlate_rwlock);
+                      bool may_enable, bool is_layer3) OVS_REQ_WRLOCK(xlate_rwlock);
 void xlate_ofport_remove(struct ofport_dpif *) OVS_REQ_WRLOCK(xlate_rwlock);
 
 int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 58bdd2f..ffb3270 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -807,7 +807,7 @@ type_run(const char *type)
                                  ofport->bfd, ofport->peer, stp_port,
                                  ofport->qdscp, ofport->n_qdscp,
                                  ofport->up.pp.config, ofport->is_tunnel,
-                                 ofport->may_enable);
+                                 ofport->may_enable, ofport->is_layer3);
             }
             ovs_rwlock_unlock(&xlate_rwlock);
 
@@ -5325,7 +5325,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
 
             in_port_ = flow.in_port;
             ds_put_cstr(&result, "Packet: ");
-            s = ofp_packet_to_string(packet->data, packet->size);
+            s = ofp_packet_to_string(packet->data, packet->size, packet->l3);
             ds_put_cstr(&result, s);
             free(s);
 
-- 
1.7.7.5 (Apple Git-26)




More information about the dev mailing list