[ovs-dev] [PATCH v6 2/3] userspace: add layer 3 flow and switching support
Lorand Jakab
lojakab at cisco.com
Mon Nov 3 21:36:30 UTC 2014
This commit relaxes the assumption that all packets have an Ethernet
header, and adds support for layer 3 flows. For each packet received on
the Linux kernel datapath the l2 and l3 members of struct ofpbuf are
intialized appropriately, and some functions now expect this (notably
flow_extract()), in order to differentiate between layer 2 and layer 3
packets. struct flow has now a new 'base_layer' member, because we
cannot assume that a flow has no Ethernet header when eth_src and
eth_dst are 0. For layer 3 packets, the protocol type is still stored
in the eth_type member.
Switching L2->L3 and L3->L2 are both implemented by adding the pop_eth
and push_eth actions respectively when a transition is detected. The
push_eth action puts 0s on both source and destination MACs. These
addresses can be modified with mod_dl_dst and mod_dl_src actions.
Added new prerequisite MFP_ETHERNET for fields MFF_ETH_SRC, MFF_ETH_DST,
MFF_VLAN_TCI, MFF_DL_VLAN, MFF_VLAN_VID and MFF_DL_VLAN_PCP.
Signed-off-by: Lorand Jakab <lojakab at cisco.com>
---
build-aux/extract-ofp-fields | 1 +
lib/bfd.c | 1 +
lib/dpif-netdev.c | 3 +-
lib/dpif-netlink.c | 8 +++
lib/dpif.c | 6 ++-
lib/flow.c | 124 +++++++++++++++++++++++++++----------------
lib/flow.h | 16 ++++--
lib/match.c | 2 +-
lib/meta-flow.c | 5 ++
lib/meta-flow.h | 13 ++---
lib/netdev-dummy.c | 1 +
lib/netdev-linux.c | 1 +
lib/nx-match.c | 2 +-
lib/odp-util.c | 34 ++++++++----
lib/odp-util.h | 2 +-
lib/ofp-print.c | 19 ++++---
lib/ofp-print.h | 3 +-
lib/ofp-util.c | 2 +-
lib/ofpbuf.h | 12 +++--
lib/packets.c | 2 +
lib/pcap-file.c | 1 +
ofproto/ofproto-dpif-xlate.c | 29 +++++++---
ofproto/ofproto-dpif-xlate.h | 2 +-
ofproto/ofproto-dpif.c | 3 +-
ofproto/ofproto.c | 1 +
tests/ofproto-dpif.at | 6 +--
tests/vlan-splinters.at | 4 +-
27 files changed, 207 insertions(+), 96 deletions(-)
diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields
index 95714ee..de053a0 100755
--- a/build-aux/extract-ofp-fields
+++ b/build-aux/extract-ofp-fields
@@ -33,6 +33,7 @@ FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8),
"TCP flags": ("MFS_TCP_FLAGS", 2, 2)}
PREREQS = {"none": "MFP_NONE",
+ "Ethernet": "MFP_ETHERNET",
"ARP": "MFP_ARP",
"VLAN VID": "MFP_VLAN_VID",
"IPv4": "MFP_IPV4",
diff --git a/lib/bfd.c b/lib/bfd.c
index 7f6bf5b..84e14d4 100644
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -610,6 +610,7 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL));
ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */
+ ofpbuf_set_frame(p, ofpbuf_data(p));
eth = ofpbuf_put_uninit(p, sizeof *eth);
memcpy(eth->eth_src,
eth_addr_is_zero(bfd->local_eth_src) ? eth_src
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7dde0cd..7273531 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2584,7 +2584,8 @@ dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_,
true);
packet_str = ofp_packet_to_string(ofpbuf_data(packet),
- ofpbuf_size(packet));
+ ofpbuf_size(packet),
+ ofpbuf_is_layer3_packet(packet));
odp_flow_key_format(ofpbuf_data(&key), ofpbuf_size(&key), &ds);
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index eebbf06..176851c 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1909,6 +1909,14 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
ofpbuf_set_data(&upcall->packet,
(char *)ofpbuf_data(&upcall->packet) + sizeof(struct nlattr));
ofpbuf_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
+ ofpbuf_set_frame(&upcall->packet, ofpbuf_data(&upcall->packet));
+
+ /* Set the correct layer based on the presence of OVS_KEY_ATTR_ETHERNET */
+ if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
+ ofpbuf_set_l3(&upcall->packet, NULL);
+ } else {
+ upcall->packet.l3_ofs = 0;
+ }
*dp_ifindex = ovs_header->dp_ifindex;
diff --git a/lib/dpif.c b/lib/dpif.c
index b94110c..bfcc97a 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1291,7 +1291,8 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
char *packet;
packet = ofp_packet_to_string(ofpbuf_data(&upcall->packet),
- ofpbuf_size(&upcall->packet));
+ ofpbuf_size(&upcall->packet),
+ ofpbuf_is_layer3_packet(&upcall->packet));
ds_init(&flow);
odp_flow_key_format(upcall->key, upcall->key_len, &flow);
@@ -1565,7 +1566,8 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
char *packet;
packet = ofp_packet_to_string(ofpbuf_data(execute->packet),
- ofpbuf_size(execute->packet));
+ ofpbuf_size(execute->packet),
+ ofpbuf_is_layer3_packet(execute->packet));
ds_put_format(&ds, "%s: %sexecute ",
dpif_name(dpif),
(subexecute ? "sub-"
diff --git a/lib/flow.c b/lib/flow.c
index 3935ea6..5587c4b 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -121,7 +121,7 @@ struct mf_ctx {
* away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are
* defined as macros. */
-#if (FLOW_WC_SEQ != 27)
+#if (FLOW_WC_SEQ != 28)
#define MINIFLOW_ASSERT(X) ovs_assert(X)
BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
"assertions enabled. Consider updating FLOW_WC_SEQ after "
@@ -326,18 +326,35 @@ invalid:
return false;
}
-/* Initializes 'flow' members from 'packet' and 'md'
+/* Determines IP version if a layer 3 packet */
+static ovs_be16
+get_l3_eth_type(struct ofpbuf *packet)
+{
+ struct ip_header *ip = ofpbuf_l3(packet);
+ int ip_ver = IP_VER(ip->ip_ihl_ver);
+ switch (ip_ver) {
+ case 4:
+ return htons(ETH_TYPE_IP);
+ case 6:
+ return htons(ETH_TYPE_IPV6);
+ default:
+ return 0;
+ }
+}
+
+/* Initializes 'flow' members from 'packet' and 'md'. Expects packet->frame
+ * pointer to be equal to ofpbuf_data(packet), and packet->l3_ofs to be set to
+ * 0 for layer 3 packets.
*
- * Initializes 'packet' header l2 pointer to the start of the Ethernet
- * header, and the layer offsets as follows:
+ * Initializes the layer offsets as follows:
*
* - packet->l2_5_ofs to the start of the MPLS shim header, or UINT16_MAX
- * when there is no MPLS shim header.
+ * when there is no MPLS shim header, or Ethernet header
*
- * - packet->l3_ofs to just past the Ethernet header, or just past the
- * vlan_header if one is present, to the first byte of the payload of the
- * Ethernet frame. UINT16_MAX if the frame is too short to contain an
- * Ethernet header.
+ * - packet->l3_ofs (if not 0) to just past the Ethernet header, or just
+ * past the vlan_header if one is present, to the first byte of the
+ * payload of the Ethernet frame. UINT16_MAX if the frame is too short to
+ * contain an Ethernet header.
*
* - packet->l4_ofs to just past the IPv4 header, if one is present and
* has at least the content used for the fields of interest for the flow,
@@ -354,6 +371,8 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
COVERAGE_INC(flow_extract);
+ ovs_assert(packet->frame == ofpbuf_data(packet));
+
miniflow_initialize(&m.mf, m.buf);
miniflow_extract(packet, md, &m.mf);
miniflow_expand(&m.mf, flow);
@@ -369,7 +388,7 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
size_t size = ofpbuf_size(packet);
uint32_t *values = miniflow_values(dst);
struct mf_ctx mf = { 0, values, values + FLOW_U32S };
- char *l2;
+ char *frame = NULL;
ovs_be16 dl_type;
uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
@@ -385,40 +404,48 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
}
- /* Initialize packet's layer pointer and offsets. */
- l2 = data;
- ofpbuf_set_frame(packet, data);
+ if (packet->l3_ofs) {
+ frame = data;
+ miniflow_push_uint32(mf, base_layer, LAYER_2);
- /* Must have full Ethernet header to proceed. */
- if (OVS_UNLIKELY(size < sizeof(struct eth_header))) {
- goto out;
- } else {
- ovs_be16 vlan_tci;
+ /* Must have full Ethernet header to proceed. */
+ if (OVS_UNLIKELY(size < sizeof(struct eth_header))) {
+ goto out;
+ } else {
+ ovs_be16 vlan_tci;
+
+ /* Link layer. */
+ BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6
+ == offsetof(struct flow, dl_src));
+ miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4);
+ /* dl_type, vlan_tci. */
+ vlan_tci = parse_vlan(&data, &size);
+ dl_type = parse_ethertype(&data, &size);
+ miniflow_push_be16(mf, dl_type, dl_type);
+ miniflow_push_be16(mf, vlan_tci, vlan_tci);
+ }
- /* Link layer. */
- BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6
- == offsetof(struct flow, dl_src));
- miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4);
- /* dl_type, vlan_tci. */
- vlan_tci = parse_vlan(&data, &size);
- dl_type = parse_ethertype(&data, &size);
- miniflow_push_be16(mf, dl_type, dl_type);
- miniflow_push_be16(mf, vlan_tci, vlan_tci);
- }
+ /* Parse mpls. */
+ if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
+ int count;
+ const void *mpls = data;
+
+ packet->l2_5_ofs = (char *)data - frame;
+ count = parse_mpls(&data, &size);
+ miniflow_push_words(mf, mpls_lse, mpls, count);
+ }
- /* Parse mpls. */
- if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
- int count;
- const void *mpls = data;
+ /* Network layer. */
+ packet->l3_ofs = (char *)data - frame;
+ } else {
+ miniflow_push_uint32(mf, base_layer, LAYER_3);
- packet->l2_5_ofs = (char *)data - l2;
- count = parse_mpls(&data, &size);
- miniflow_push_words(mf, mpls_lse, mpls, count);
+ /* We assume L3 packets are either IPv4 or IPv6 */
+ dl_type = get_l3_eth_type(packet);
+ miniflow_push_be16(mf, dl_type, dl_type);
+ miniflow_push_be16(mf, vlan_tci, 0);
}
- /* Network layer. */
- packet->l3_ofs = (char *)data - l2;
-
nw_frag = 0;
if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
const struct ip_header *nh = data;
@@ -569,7 +596,7 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md,
goto out;
}
- packet->l4_ofs = (char *)data - l2;
+ packet->l4_ofs = (char *)data - frame;
miniflow_push_be32(mf, nw_frag,
BYTES_TO_BE32(nw_frag, nw_tos, nw_ttl, nw_proto));
@@ -668,7 +695,7 @@ flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc)
void
flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
fmd->dp_hash = flow->dp_hash;
fmd->recirc_id = flow->recirc_id;
@@ -815,7 +842,7 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
memset(&wc->masks, 0x0, sizeof wc->masks);
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
if (flow->tunnel.ip_dst) {
if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
@@ -839,11 +866,14 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
WC_MASK_FIELD(wc, recirc_id);
WC_MASK_FIELD(wc, dp_hash);
WC_MASK_FIELD(wc, in_port);
+ WC_MASK_FIELD(wc, base_layer);
- WC_MASK_FIELD(wc, dl_dst);
- WC_MASK_FIELD(wc, dl_src);
WC_MASK_FIELD(wc, dl_type);
- WC_MASK_FIELD(wc, vlan_tci);
+ if (flow->base_layer == LAYER_2) {
+ WC_MASK_FIELD(wc, dl_dst);
+ WC_MASK_FIELD(wc, dl_src);
+ WC_MASK_FIELD(wc, vlan_tci);
+ }
if (flow->dl_type == htons(ETH_TYPE_IP)) {
WC_MASK_FIELD(wc, nw_src);
@@ -910,7 +940,7 @@ uint64_t
flow_wc_map(const struct flow *flow)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
uint64_t map = (flow->tunnel.ip_dst) ? MINIFLOW_MAP(tunnel) : 0;
@@ -962,7 +992,7 @@ void
flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
@@ -1519,7 +1549,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
/* Clear all L3 and L4 fields. */
- BUILD_ASSERT(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT(FLOW_WC_SEQ == 28);
memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
}
diff --git a/lib/flow.h b/lib/flow.h
index 14bc414..2a5c6e9 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -38,7 +38,7 @@ struct pkt_metadata;
/* This sequence number should be incremented whenever anything involving flows
* or the wildcarding of flows changes. This will cause build assertion
* failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 27
+#define FLOW_WC_SEQ 28
/* Number of Open vSwitch extension 32-bit registers. */
#define FLOW_N_REGS 8
@@ -74,6 +74,11 @@ const char *flow_tun_flag_to_string(uint32_t flags);
/* Maximum number of supported MPLS labels. */
#define FLOW_MAX_MPLS_LABELS 3
+enum base_layer {
+ LAYER_2 = 0,
+ LAYER_3 = 1
+};
+
/*
* A flow in the network.
*
@@ -90,6 +95,10 @@ const char *flow_tun_flag_to_string(uint32_t flags);
* lower layer fields are first used to determine if the later fields need to
* be looked at. This enables better wildcarding for datapath flows.
*
+ * The starting layer is specified by 'base_layer'. When 'base_layer' is
+ * LAYER_3, dl_src, dl_tci, and vlan_tci are not used for matching. The
+ * dl_type field is still used to specify the layer 3 protocol.
+ *
* NOTE: Order of the fields is significant, any change in the order must be
* reflected in miniflow_extract()!
*/
@@ -102,6 +111,7 @@ struct flow {
uint32_t pkt_mark; /* Packet mark. */
uint32_t recirc_id; /* Must be exact match. */
union flow_in_port in_port; /* Input port.*/
+ uint32_t base_layer; /* Fields start at this layer */
/* L2, Order the same as in the Ethernet header! */
uint8_t dl_dst[ETH_ADDR_LEN]; /* Ethernet destination address. */
@@ -153,8 +163,8 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
/* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
BUILD_ASSERT_DECL(offsetof(struct flow, dp_hash) + sizeof(uint32_t)
- == sizeof(struct flow_tnl) + 176
- && FLOW_WC_SEQ == 27);
+ == sizeof(struct flow_tnl) + 180
+ && FLOW_WC_SEQ == 28);
/* Incremental points at which flow classification may be performed in
* segments.
diff --git a/lib/match.c b/lib/match.c
index 9a84546..58b45b7 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -863,7 +863,7 @@ match_format(const struct match *match, struct ds *s, int priority)
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
if (priority != OFP_DEFAULT_PRIORITY) {
ds_put_format(s, "priority=%d,", priority);
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 7871545..de260c0 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -257,6 +257,8 @@ mf_are_prereqs_ok(const struct mf_field *mf, const struct flow *flow)
case MFP_NONE:
return true;
+ case MFP_ETHERNET:
+ return flow->base_layer == LAYER_2;
case MFP_ARP:
return (flow->dl_type == htons(ETH_TYPE_ARP) ||
flow->dl_type == htons(ETH_TYPE_RARP));
@@ -334,6 +336,9 @@ mf_mask_field_and_prereqs(const struct mf_field *mf, struct flow *mask)
case MFP_VLAN_VID:
mask->vlan_tci |= htons(VLAN_CFI);
break;
+ case MFP_ETHERNET:
+ mask->base_layer = UINT32_MAX;
+ break;
case MFP_NONE:
break;
}
diff --git a/lib/meta-flow.h b/lib/meta-flow.h
index 4607c7f..6571b67 100644
--- a/lib/meta-flow.h
+++ b/lib/meta-flow.h
@@ -552,7 +552,7 @@ enum OVS_PACKED_ENUM mf_field_id {
* Type: MAC.
* Maskable: bitwise.
* Formatting: Ethernet.
- * Prerequisites: none.
+ * Prerequisites: Ethernet.
* Access: read/write.
* NXM: NXM_OF_ETH_SRC(2) since v1.1.
* OXM: OXM_OF_ETH_SRC(4) since OF1.2 and v1.7.
@@ -572,7 +572,7 @@ enum OVS_PACKED_ENUM mf_field_id {
* Type: MAC.
* Maskable: bitwise.
* Formatting: Ethernet.
- * Prerequisites: none.
+ * Prerequisites: Ethernet.
* Access: read/write.
* NXM: NXM_OF_ETH_DST(1) since v1.1.
* OXM: OXM_OF_ETH_DST(3) since OF1.2 and v1.7.
@@ -656,7 +656,7 @@ enum OVS_PACKED_ENUM mf_field_id {
* Type: be16.
* Maskable: bitwise.
* Formatting: hexadecimal.
- * Prerequisites: none.
+ * Prerequisites: Ethernet.
* Access: read/write.
* NXM: NXM_OF_VLAN_TCI(4) since v1.1.
* OXM: none.
@@ -672,7 +672,7 @@ enum OVS_PACKED_ENUM mf_field_id {
* Type: be16 (low 12 bits).
* Maskable: no.
* Formatting: decimal.
- * Prerequisites: none.
+ * Prerequisites: Ethernet.
* Access: read/write.
* NXM: none.
* OXM: none.
@@ -690,7 +690,7 @@ enum OVS_PACKED_ENUM mf_field_id {
* Type: be16 (low 12 bits).
* Maskable: bitwise.
* Formatting: decimal.
- * Prerequisites: none.
+ * Prerequisites: Ethernet.
* Access: read/write.
* NXM: none.
* OXM: OXM_OF_VLAN_VID(6) since OF1.2 and v1.7.
@@ -706,7 +706,7 @@ enum OVS_PACKED_ENUM mf_field_id {
* Type: u8 (low 3 bits).
* Maskable: no.
* Formatting: decimal.
- * Prerequisites: none.
+ * Prerequisites: Ethernet.
* Access: read/write.
* NXM: none.
* OXM: none.
@@ -1377,6 +1377,7 @@ enum OVS_PACKED_ENUM mf_prereqs {
MFP_NONE,
/* L2 requirements. */
+ MFP_ETHERNET,
MFP_ARP,
MFP_VLAN_VID,
MFP_IPV4,
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index a2b1f2c..3ad8ee5 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -804,6 +804,7 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dpif_packet **arr,
netdev->stats.rx_bytes += ofpbuf_size(packet);
ovs_mutex_unlock(&netdev->mutex);
+ ofpbuf_set_frame(packet, ofpbuf_data(packet));
dp_packet_pad(packet);
/* This performs a (sometimes unnecessary) copy */
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index d19220b..cf0f7b2 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1053,6 +1053,7 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dpif_packet **packets,
}
dpif_packet_delete(packet);
} else {
+ ofpbuf_set_frame(buffer, ofpbuf_data(buffer));
dp_packet_pad(buffer);
dpif_packet_set_dp_hash(packet, 0);
packets[0] = packet;
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 82b472c..f36fccf 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -842,7 +842,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
int match_len;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
/* Metadata. */
if (match->wc.masks.dp_hash) {
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 5362897..080d4ee 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -2613,7 +2613,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow,
size_t max_mpls_depth, bool recirc, bool export_mask)
{
struct ovs_key_ethernet *eth_key;
- size_t encap;
+ size_t encap = 0;
const struct flow *data = export_mask ? mask : flow;
nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority);
@@ -2635,6 +2635,10 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow,
nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port);
}
+ if (flow->base_layer == LAYER_3) {
+ goto noethernet;
+ }
+
eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET,
sizeof *eth_key);
get_ethernet_key(data, eth_key);
@@ -2650,8 +2654,6 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow,
if (flow->vlan_tci == htons(0)) {
goto unencap;
}
- } else {
- encap = 0;
}
if (ntohs(flow->dl_type) < ETH_TYPE_MIN) {
@@ -2674,6 +2676,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow,
nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type);
+noethernet:
if (flow->dl_type == htons(ETH_TYPE_IP)) {
struct ovs_key_ipv4 *ipv4_key;
@@ -3053,7 +3056,13 @@ parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
*expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE;
} else {
if (!is_mask) {
- flow->dl_type = htons(FLOW_DL_TYPE_NONE);
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) {
+ flow->dl_type = htons(ETH_TYPE_IP);
+ } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV6)) {
+ flow->dl_type = htons(ETH_TYPE_IPV6);
+ } else {
+ flow->dl_type = htons(FLOW_DL_TYPE_NONE);
+ }
} else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) {
/* See comments in odp_flow_key_from_flow__(). */
VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame");
@@ -3443,12 +3452,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]);
put_ethernet_key(eth_key, flow);
- if (is_mask) {
- expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
- }
- }
- if (!is_mask) {
+ flow->base_layer = LAYER_2;
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
+ } else {
+ flow->base_layer = LAYER_3;
}
/* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */
@@ -3465,6 +3472,7 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
}
if (is_mask) {
flow->vlan_tci = htons(0xffff);
+ flow->base_layer = 0xffffffff;
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN)) {
flow->vlan_tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]);
expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_VLAN);
@@ -3700,6 +3708,14 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base_flow,
{
struct ovs_key_ethernet key, base, mask;
+ /* If we have a L3 --> L2 flow, the push_eth action takes care of setting
+ * the appropriate MAC source and destination addresses, no need to add a
+ * set action
+ */
+ if (base_flow->base_layer == LAYER_3 && flow->base_layer == LAYER_2) {
+ return;
+ }
+
get_ethernet_key(flow, &key);
get_ethernet_key(base_flow, &base);
get_ethernet_key(&wc->masks, &mask);
diff --git a/lib/odp-util.h b/lib/odp-util.h
index b1584d6..69314bb 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -134,7 +134,7 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* add another field and forget to adjust this value.
*/
#define ODPUTIL_FLOW_KEY_BYTES 512
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
/* A buffer with sufficient size and alignment to hold an nlattr-formatted flow
* key. An array of "struct nlattr" might not, in theory, be sufficiently
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 8dc7f06..ecd94d6 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -55,10 +55,11 @@ static void ofp_print_table_features(struct ds *,
const struct ofputil_table_features *,
const struct ofputil_table_stats *);
-/* Returns a string that represents the contents of the Ethernet frame in the
- * 'len' bytes starting at 'data'. The caller must free the returned string.*/
+/* Returns a string that represents the contents of the Ethernet frame
+ * (is_layer3 == False) or IP packet (is_layer3 == True) in the 'len' bytes
+ * starting at 'data'. The caller must free the returned string.*/
char *
-ofp_packet_to_string(const void *data, size_t len)
+ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
{
struct ds ds = DS_EMPTY_INITIALIZER;
const struct pkt_metadata md = PKT_METADATA_INITIALIZER(0);
@@ -67,6 +68,12 @@ ofp_packet_to_string(const void *data, size_t len)
size_t l4_size;
ofpbuf_use_const(&buf, data, len);
+ ofpbuf_set_frame(&buf, ofpbuf_data(&buf));
+
+ if (is_layer3) {
+ buf.l3_ofs = 0;
+ }
+
flow_extract(&buf, &md, &flow);
flow_format(&ds, &flow);
@@ -160,7 +167,7 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh,
ds_put_char(string, '\n');
if (verbosity > 0) {
- char *packet = ofp_packet_to_string(pin.packet, pin.packet_len);
+ char *packet = ofp_packet_to_string(pin.packet, pin.packet_len, false);
ds_put_cstr(string, packet);
free(packet);
}
@@ -194,7 +201,7 @@ ofp_print_packet_out(struct ds *string, const struct ofp_header *oh,
if (po.buffer_id == UINT32_MAX) {
ds_put_format(string, " data_len=%"PRIuSIZE, po.packet_len);
if (verbosity > 0 && po.packet_len > 0) {
- char *packet = ofp_packet_to_string(po.packet, po.packet_len);
+ char *packet = ofp_packet_to_string(po.packet, po.packet_len, false);
ds_put_char(string, '\n');
ds_put_cstr(string, packet);
free(packet);
@@ -2916,5 +2923,5 @@ ofp_print(FILE *stream, const void *oh, size_t len, int verbosity)
void
ofp_print_packet(FILE *stream, const void *data, size_t len)
{
- print_and_free(stream, ofp_packet_to_string(data, len));
+ print_and_free(stream, ofp_packet_to_string(data, len, false));
}
diff --git a/lib/ofp-print.h b/lib/ofp-print.h
index 825e139..15aa196 100644
--- a/lib/ofp-print.h
+++ b/lib/ofp-print.h
@@ -21,6 +21,7 @@
#include <stdint.h>
#include <stdio.h>
+#include <stdbool.h>
struct ds;
struct ofp10_match;
@@ -39,7 +40,7 @@ void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity);
char *ofp_to_string(const void *, size_t, int verbosity);
char *ofp10_match_to_string(const struct ofp10_match *, int verbosity);
-char *ofp_packet_to_string(const void *data, size_t len);
+char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3);
void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *);
void ofp_print_version(const struct ofp_header *, struct ds *);
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 573f38a..09ce01e 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -185,7 +185,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
void
ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
/* Initialize most of wc. */
flow_wildcards_init_catchall(wc);
diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
index 53c43fb..5fa5030 100644
--- a/lib/ofpbuf.h
+++ b/lib/ofpbuf.h
@@ -269,11 +269,12 @@ static inline bool ofpbuf_equal(const struct ofpbuf *a, const struct ofpbuf *b)
memcmp(ofpbuf_data(a), ofpbuf_data(b), ofpbuf_size(a)) == 0;
}
-/* Get the start if the Ethernet frame. 'l3_ofs' marks the end of the l2
- * headers, so return NULL if it is not set. */
+/* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2
+ * headers, so return NULL if it is not set. A 'l3_ofs' of 0 marks a layer 3
+ * packet, so return NULL in that case too. */
static inline void * ofpbuf_l2(const struct ofpbuf *b)
{
- return (b->l3_ofs != UINT16_MAX) ? b->frame : NULL;
+ return (b->l3_ofs != UINT16_MAX && b->l3_ofs != 0) ? b->frame : NULL;
}
/* Sets the packet frame start pointer and resets all layer offsets.
@@ -355,6 +356,11 @@ static inline const void *ofpbuf_get_icmp_payload(const struct ofpbuf *b)
? (const char *)ofpbuf_l4(b) + ICMP_HEADER_LEN : NULL;
}
+static inline bool ofpbuf_is_layer3_packet(const struct ofpbuf *b)
+{
+ return (b->frame == ofpbuf_data(b)) && (b->l3_ofs == 0);
+}
+
#ifdef DPDK_NETDEV
BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0);
diff --git a/lib/packets.c b/lib/packets.c
index 4b5699a..d074bef 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -391,6 +391,8 @@ eth_from_hex(const char *hex, struct ofpbuf **packetp)
return "Packet data too short for Ethernet";
}
+ ofpbuf_set_frame(packet, ofpbuf_data(packet));
+
return NULL;
}
diff --git a/lib/pcap-file.c b/lib/pcap-file.c
index 191e690..682503d 100644
--- a/lib/pcap-file.c
+++ b/lib/pcap-file.c
@@ -185,6 +185,7 @@ ovs_pcap_read(FILE *file, struct ofpbuf **bufp, long long int *when)
ofpbuf_delete(buf);
return error;
}
+ ofpbuf_set_frame(buf, ofpbuf_data(buf));
*bufp = buf;
return 0;
}
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index e9bb4ff..a0b7f4c 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -161,6 +161,7 @@ struct xport {
bool may_enable; /* May be enabled in bonds. */
bool is_tunnel; /* Is a tunnel port. */
+ bool is_layer3; /* Is a layer 3 port. */
struct cfm *cfm; /* CFM handle or null. */
struct bfd *bfd; /* BFD handle or null. */
@@ -386,7 +387,7 @@ static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
const struct rstp_port *rstp_port,
enum ofputil_port_config config,
enum ofputil_port_state state, bool is_tunnel,
- bool may_enable);
+ bool may_enable, bool is_layer3);
static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
@@ -526,13 +527,14 @@ xlate_xport_set(struct xport *xport, odp_port_t odp_port,
const struct bfd *bfd, int stp_port_no,
const struct rstp_port* rstp_port,
enum ofputil_port_config config, enum ofputil_port_state state,
- bool is_tunnel, bool may_enable)
+ bool is_tunnel, bool may_enable, bool is_layer3)
{
xport->config = config;
xport->state = state;
xport->stp_port_no = stp_port_no;
xport->is_tunnel = is_tunnel;
xport->may_enable = may_enable;
+ xport->is_layer3 = is_layer3;
xport->odp_port = odp_port;
if (xport->rstp_port != rstp_port) {
@@ -620,7 +622,7 @@ xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm,
xport->bfd, xport->stp_port_no, xport->rstp_port,
xport->config, xport->state, xport->is_tunnel,
- xport->may_enable);
+ xport->may_enable, xport->is_layer3);
if (xport->peer) {
struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport);
@@ -864,7 +866,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
enum ofputil_port_config config,
enum ofputil_port_state state, bool is_tunnel,
- bool may_enable)
+ bool may_enable, bool is_layer3)
{
size_t i;
struct xport *xport;
@@ -884,7 +886,8 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
ovs_assert(xport->ofp_port == ofp_port);
xlate_xport_set(xport, odp_port, netdev, cfm, bfd, stp_port_no,
- rstp_port, config, state, is_tunnel, may_enable);
+ rstp_port, config, state, is_tunnel, may_enable,
+ is_layer3);
if (xport->peer) {
xport->peer->peer = NULL;
@@ -2127,7 +2130,7 @@ xlate_normal(struct xlate_ctx *ctx)
}
/* Learn source MAC. */
- if (ctx->xin->may_learn) {
+ if (ctx->xin->may_learn && !(in_port->is_layer3)) {
update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
}
if (ctx->xin->xcache) {
@@ -2469,6 +2472,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
struct flow_wildcards *wc = &ctx->xout->wc;
struct flow *flow = &ctx->xin->flow;
+ const struct xport *in_xport = get_ofp_port(ctx->xbridge, flow->in_port.ofp_port);
ovs_be16 flow_vlan_tci;
uint32_t flow_pkt_mark;
uint8_t flow_nw_tos;
@@ -2477,7 +2481,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
/* If 'struct flow' gets additional metadata, we'll need to zero it out
* before traversing a patch port. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
if (!xport) {
xlate_report(ctx, "Nonexistent output port");
@@ -2516,6 +2520,16 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
xport->xbundle);
}
+ if (in_xport && !in_xport->is_layer3 && xport->is_layer3) {
+ odp_put_pop_eth_action(ctx->xout->odp_actions);
+ }
+
+ if (flow->base_layer == LAYER_3 && !xport->is_layer3) {
+ flow->base_layer = LAYER_2;
+ odp_put_push_eth_action(ctx->xout->odp_actions, flow->dl_src,
+ flow->dl_dst, flow->dl_type);
+ }
+
if (xport->peer) {
const struct xport *peer = xport->peer;
struct flow old_flow = ctx->xin->flow;
@@ -4196,6 +4210,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
flow_wildcards_init_catchall(wc);
memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port);
+ memset(&wc->masks.base_layer, 0xff, sizeof wc->masks.base_layer);
memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
if (is_ip_any(flow)) {
wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 5ef20b1..7393cd7 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -173,7 +173,7 @@ void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *,
const struct ofproto_port_queue *qdscp,
size_t n_qdscp, enum ofputil_port_config,
enum ofputil_port_state, bool is_tunnel,
- bool may_enable);
+ bool may_enable, bool is_layer3);
void xlate_ofport_remove(struct ofport_dpif *);
struct ofproto_dpif * xlate_lookup_ofproto(const struct dpif_backer *,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index d965d38..fbea19a 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -645,7 +645,7 @@ type_run(const char *type)
ofport->rstp_port, ofport->qdscp,
ofport->n_qdscp, ofport->up.pp.config,
ofport->up.pp.state, ofport->is_tunnel,
- ofport->may_enable);
+ ofport->may_enable, ofport->is_layer3);
}
xlate_txn_commit();
}
@@ -1060,6 +1060,7 @@ check_variable_length_userdata(struct dpif_backer *backer)
ofpbuf_init(&packet, ETH_HEADER_LEN);
eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
eth->eth_type = htons(0x1234);
+ ofpbuf_set_frame(&packet, ofpbuf_data(&packet));
/* Execute the actions. On older datapaths this fails with ERANGE, on
* newer datapaths it succeeds. */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 1233164..8b88417 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -3165,6 +3165,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh)
} else {
/* Ensure that the L3 header is 32-bit aligned. */
payload = ofpbuf_clone_data_with_headroom(po.packet, po.packet_len, 2);
+ ofpbuf_set_frame(payload, ofpbuf_data(payload));
}
/* Verify actions against packet, then send packet if successful. */
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 652a2a3..8f06c58 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -3576,15 +3576,15 @@ in_port=2 actions=output:1
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-odp_flow="in_port(1)"
-br_flow="in_port=1"
+odp_flow="in_port(1),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"
+br_flow="in_port=1,dl_dst=00:00:00:00:00:00"
# Test command: ofproto/trace odp_flow with in_port as a name.
AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout])
AT_CHECK([tail -1 stdout], [0], [dnl
Datapath actions: 2
])
-odp_flow="in_port(1)"
+odp_flow="in_port(1),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"
# Test command: ofproto/trace odp_flow
AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout])
AT_CHECK([tail -1 stdout], [0], [dnl
diff --git a/tests/vlan-splinters.at b/tests/vlan-splinters.at
index 883528d..0b1b3db 100644
--- a/tests/vlan-splinters.at
+++ b/tests/vlan-splinters.at
@@ -28,7 +28,7 @@ for args in '9 p2' '11 p3' '15 p4'; do
# Check that when a packet is received on $splinter_port, it is
# treated as if it had been received on p1 in the correct VLAN.
- AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port)"],
+ AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"],
[0], [stdout])
AT_CHECK_UNQUOTED([sed -n '/^Flow/p; /^Datapath/p' stdout], [0], [dnl
Flow: in_port=$p1,dl_vlan=$vlan,dl_vlan_pcp=0,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x05ff
@@ -37,7 +37,7 @@ Datapath actions: $access_port
# Check that when an OpenFlow action sends a packet to p1 on
# splintered VLAN $vlan, it is actually output to $splinter_port.
- AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port)"],
+ AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"],
[0], [stdout])
AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $splinter_port
])
--
1.9.3 (Apple Git-50)
More information about the dev
mailing list