[ovs-dev] [PATCH] tunnels: Don't initialize unnecessary packet metadata.

Jesse Gross jesse at nicira.com
Wed Jul 1 06:19:08 UTC 2015


The addition of Geneve options to packet metadata significantly
expanded its size. It was reported that this can decrease performance
for DPDK ports by up to 25% since we need to initialize the whole
structure on each packet receive.

It is not really necessary to zero out the entire structure because
miniflow_extract() only copies the tunnel metadata when particular
fields indicate that it is valid. Therefore, as long as we zero out
these fields when the metadata is initialized and ensure that the
rest of the structure is correctly set in the presence of a tunnel,
we can avoid touching the tunnel fields on packet reception.

Reported-by: Ciara Loftus <ciara.loftus at intel.com>
Signed-off-by: Jesse Gross <jesse at nicira.com>
---
 lib/dp-packet.c               |  2 +-
 lib/dpif-netdev.c             | 21 ++++++++++-----------
 lib/netdev-vport.c            | 16 +++++++++++++---
 lib/netdev.c                  |  2 +-
 lib/odp-util.c                |  3 ++-
 lib/packets.h                 | 15 ++++++++++++---
 lib/tun-metadata.h            |  2 +-
 ofproto/ofproto-dpif-upcall.c |  1 -
 utilities/ovs-ofctl.c         |  4 ++--
 9 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 31fe9d3..098f816 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -29,7 +29,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
     b->source = source;
     b->l2_pad_size = 0;
     b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX;
-    b->md = PKT_METADATA_INITIALIZER(0);
+    pkt_metadata_init(&b->md, 0);
 }
 
 static void
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index dd6bb1f..d146546 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -235,7 +235,7 @@ enum pmd_cycles_counter_type {
 
 /* A port in a netdev-based datapath. */
 struct dp_netdev_port {
-    struct pkt_metadata md;
+    odp_port_t port_no;
     struct netdev *netdev;
     struct cmap_node node;      /* Node in dp_netdev's 'ports'. */
     struct netdev_saved_flags *sf;
@@ -1085,7 +1085,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
         }
     }
     port = xzalloc(sizeof *port);
-    port->md = PKT_METADATA_INITIALIZER(port_no);
+    port->port_no = port_no;
     port->netdev = netdev;
     port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
     port->type = xstrdup(type);
@@ -1190,7 +1190,7 @@ dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
     struct dp_netdev_port *port;
 
     CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
-        if (port->md.in_port.odp_port == port_no) {
+        if (port->port_no == port_no) {
             return port;
         }
     }
@@ -1300,8 +1300,7 @@ static void
 do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
     OVS_REQUIRES(dp->port_mutex)
 {
-    cmap_remove(&dp->ports, &port->node,
-                hash_odp_port(port->md.in_port.odp_port));
+    cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
     seq_change(dp->port_seq);
     if (netdev_is_pmd(port->netdev)) {
         int numa_id = netdev_get_numa_id(port->netdev);
@@ -1323,7 +1322,7 @@ answer_port_query(const struct dp_netdev_port *port,
 {
     dpif_port->name = xstrdup(netdev_get_name(port->netdev));
     dpif_port->type = xstrdup(port->type);
-    dpif_port->port_no = port->md.in_port.odp_port;
+    dpif_port->port_no = port->port_no;
 }
 
 static int
@@ -1450,7 +1449,7 @@ dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
         state->name = xstrdup(netdev_get_name(port->netdev));
         dpif_port->name = state->name;
         dpif_port->type = port->type;
-        dpif_port->port_no = port->md.in_port.odp_port;
+        dpif_port->port_no = port->port_no;
 
         retval = 0;
     } else {
@@ -2540,7 +2539,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
 
         /* XXX: initialize md in netdev implementation. */
         for (i = 0; i < cnt; i++) {
-            packets[i]->md = port->md;
+            pkt_metadata_init(&packets[i]->md, port->port_no);
         }
         cycles_count_start(pmd);
         dp_netdev_input(pmd, packets, cnt);
@@ -3652,12 +3651,12 @@ dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
     }
 
     /* Remove old port. */
-    cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->md.in_port.odp_port));
+    cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
     ovsrcu_postpone(free, old_port);
 
     /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
     new_port = xmemdup(old_port, sizeof *old_port);
-    new_port->md.in_port.odp_port = port_no;
+    new_port->port_no = port_no;
     cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
 
     seq_change(dp->port_seq);
@@ -3688,7 +3687,7 @@ dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED,
     ovs_mutex_lock(&dp->port_mutex);
     if (get_port_by_name(dp, argv[2], &port)) {
         unixctl_command_reply_error(conn, "unknown port");
-    } else if (port->md.in_port.odp_port == ODPP_LOCAL) {
+    } else if (port->port_no == ODPP_LOCAL) {
         unixctl_command_reply_error(conn, "can't delete local port");
     } else {
         do_del_port(dp, port);
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 259d0ed..a3394dd 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -1051,6 +1051,16 @@ parse_gre_header(struct dp_packet *packet,
     return hlen;
 }
 
+static void
+pkt_metadata_init_tnl(struct pkt_metadata *md)
+{
+    memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata));
+
+    /* If 'opt_map' is zero then none of the rest of the tunnel metadata
+     * will be read, so we can skip clearing it. */
+    md->tunnel.metadata.opt_map = 0;
+}
+
 static int
 netdev_gre_pop_header(struct dp_packet *packet)
 {
@@ -1059,7 +1069,7 @@ netdev_gre_pop_header(struct dp_packet *packet)
     int hlen = sizeof(struct eth_header) +
                sizeof(struct ip_header) + 4;
 
-    memset(md, 0, sizeof *md);
+    pkt_metadata_init_tnl(md);
     if (hlen > dp_packet_size(packet)) {
         return EINVAL;
     }
@@ -1143,7 +1153,7 @@ netdev_vxlan_pop_header(struct dp_packet *packet)
     struct flow_tnl *tnl = &md->tunnel;
     struct vxlanhdr *vxh;
 
-    memset(md, 0, sizeof *md);
+    pkt_metadata_init_tnl(md);
     if (VXLAN_HLEN > dp_packet_size(packet)) {
         return EINVAL;
     }
@@ -1201,7 +1211,7 @@ netdev_geneve_pop_header(struct dp_packet *packet)
     unsigned int hlen;
     int err;
 
-    memset(md, 0, sizeof *md);
+    pkt_metadata_init_tnl(md);
     if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
         VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
                      (unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
diff --git a/lib/netdev.c b/lib/netdev.c
index 42e5d8a..4819ae9 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -790,7 +790,7 @@ netdev_push_header(const struct netdev *netdev,
 
     for (i = 0; i < cnt; i++) {
         netdev->netdev_class->push_header(buffers[i], data);
-        buffers[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
+        pkt_metadata_init(&buffers[i]->md, u32_to_odp(data->out_port));
     }
 
     return 0;
diff --git a/lib/odp-util.c b/lib/odp-util.c
index c70ee0f..bce8f6b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -1463,6 +1463,7 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
 enum odp_key_fitness
 odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
 {
+    memset(tun, 0, sizeof *tun);
     return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun);
 }
 
@@ -3666,7 +3667,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
         1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL |
         1u << OVS_KEY_ATTR_IN_PORT;
 
-    *md = PKT_METADATA_INITIALIZER(ODPP_NONE);
+    pkt_metadata_init(md, ODPP_NONE);
 
     NL_ATTR_FOR_EACH (nla, left, key, key_len) {
         uint16_t type = nl_attr_type(nla);
diff --git a/lib/packets.h b/lib/packets.h
index c401d4e..311589c 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -35,8 +35,8 @@ struct ds;
 /* Tunnel information used in flow key and metadata. */
 struct flow_tnl {
     ovs_be64 tun_id;
-    ovs_be32 ip_src;
     ovs_be32 ip_dst;
+    ovs_be32 ip_src;
     uint16_t flags;
     uint8_t ip_tos;
     uint8_t ip_ttl;
@@ -69,8 +69,17 @@ struct pkt_metadata {
     struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. */
 };
 
-#define PKT_METADATA_INITIALIZER(PORT) \
-    (struct pkt_metadata){ .in_port.odp_port = PORT }
+static inline void
+pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
+{
+    /* It can be expensive to zero out all of the tunnel metadata. However,
+     * we can just zero out ip_dst and the rest of the data will never be
+     * looked at. */
+    memset(md, 0, offsetof(struct pkt_metadata, tunnel));
+    md->tunnel.ip_dst = 0;
+
+    md->in_port.odp_port = port;
+}
 
 bool dpid_from_string(const char *s, uint64_t *dpidp);
 
diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
index a1fbc0a..56bdf2a 100644
--- a/lib/tun-metadata.h
+++ b/lib/tun-metadata.h
@@ -43,8 +43,8 @@ struct geneve_opt;
  * (not necessarily even contiguous), and finding it requires referring to
  * 'tab'. */
 struct tun_metadata {
-    uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
     uint64_t opt_map;                        /* 1-bit for each present TLV. */
+    uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
     struct tun_table *tab;      /* Types & lengths for 'opts' and 'opt_map'. */
     uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 bytes */
 };
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 85f53e8..84a761a 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1134,7 +1134,6 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
             memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
 
             if (upcall->out_tun_key) {
-                memset(&output_tunnel_key, 0, sizeof output_tunnel_key);
                 odp_tun_key_from_attr(upcall->out_tun_key,
                                       &output_tunnel_key);
             }
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index 5af1f13..6ef7070 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -2018,7 +2018,7 @@ ofctl_ofp_parse_pcap(struct ovs_cmdl_context *ctx)
         if (error) {
             break;
         }
-        packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE);
+        pkt_metadata_init(&packet->md, ODPP_NONE);
         flow_extract(packet, &flow);
         if (flow.dl_type == htons(ETH_TYPE_IP)
             && flow.nw_proto == IPPROTO_TCP
@@ -3374,7 +3374,7 @@ ofctl_parse_pcap(struct ovs_cmdl_context *ctx)
             ovs_fatal(error, "%s: read failed", ctx->argv[1]);
         }
 
-        packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE);
+        pkt_metadata_init(&packet->md, ODPP_NONE);
         flow_extract(packet, &flow);
         flow_print(stdout, &flow);
         putchar('\n');
-- 
2.1.4




More information about the dev mailing list