[ovs-dev] [RFC PATCH] dpif-netdev: Support "port-forward" mode to avoid dp cache lookup

Sriharsha Basavapatna sriharsha.basavapatna at broadcom.com
Tue Apr 20 08:07:10 UTC 2021


Sometimes a port might be configured with a single flow that just
forwards packets to another port. This would be useful in configs
where the bridge is just fowarding packets between two ports (for
example, between a vhost-user port and a physical port). A flow
that matches only on the in_port and with an action that forwards
to another port would be configured, to avoid learning or matching
on packet headers.

Example:
$ ovs-ofctl add-flow br0 in_port=1,actions=output:2
$ ovs-ofctl add-flow br0 in_port=2,actions=output:1

This translates to a datapath flow with the match fields wildcarded
for the packet headers. However, the datapath processing still involves
flow cache (EMC/SMC) lookup and with a large number of flows it also
results in dpcls lookup due to cache miss. Avoiding cache lookup in
such configurations results in better performance (pps and cpp).

This patch provides a new interface config parameter - "port-forward",
to avoid datapath cache lookup. When this is enabled, the datapath flow
is saved in the port when there is a cache hit for the initial packet.
For subsequent packets, the flow is readily found in the port structure,
thus avoiding cache and dpcls lookup.

Example:
$ ovs-vsctl add-port br0 dpdk0 \
-- set Interface dpdk0 other_config:port-forward=true

A coverage counter has also been added to track packets processed in
port-forward mode.

$ ovs-appctl coverage/show   | grep datapath_port_forward_packet

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna at broadcom.com>
---
 lib/dpif-netdev.c    | 79 ++++++++++++++++++++++++++++++++++----------
 vswitchd/vswitch.xml | 11 ++++++
 2 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 251788b04..133ed7c1e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
 COVERAGE_DEFINE(datapath_drop_invalid_bond);
 COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
 COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+COVERAGE_DEFINE(datapath_port_forward_packet);
 
 /* Protects against changes to 'dp_netdevs'. */
 static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -483,6 +484,7 @@ struct dp_netdev_port {
     unsigned *txq_used;         /* Number of threads that use each tx queue. */
     struct ovs_mutex txq_used_mutex;
     bool emc_enabled;           /* If true EMC will be used. */
+    bool port_forward;
     char *type;                 /* Port type as requested by user. */
     char *rxq_affinity_list;    /* Requested affinity of rx queues. */
 };
@@ -557,6 +559,7 @@ struct dp_netdev_flow {
 
     bool dead;
     uint32_t mark;               /* Unique flow mark assigned to a flow */
+    void *port_forward_txp;
 
     /* Statistics. */
     struct dp_netdev_flow_stats stats;
@@ -610,6 +613,7 @@ struct polled_queue {
     struct dp_netdev_rxq *rxq;
     odp_port_t port_no;
     bool emc_enabled;
+    bool port_forward;
     bool rxq_enabled;
     uint64_t change_seq;
 };
@@ -628,6 +632,7 @@ struct tx_port {
     long long last_used;
     struct hmap_node node;
     long long flush_time;
+    struct dp_netdev_flow *port_forward_flow;
     struct dp_packet_batch output_pkts;
     struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
 };
@@ -840,7 +845,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                                       const struct nlattr *actions,
                                       size_t actions_len);
 static void dp_netdev_input(struct dp_netdev_pmd_thread *,
-                            struct dp_packet_batch *, odp_port_t port_no);
+                            struct dp_packet_batch *, odp_port_t port_no,
+                            bool port_forward);
 static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
                                   struct dp_packet_batch *);
 
@@ -2083,6 +2089,7 @@ port_create(const char *devname, const char *type,
     port->type = xstrdup(type);
     port->sf = NULL;
     port->emc_enabled = true;
+    port->port_forward = false;
     port->need_reconfigure = true;
     ovs_mutex_init(&port->txq_used_mutex);
 
@@ -2845,6 +2852,15 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
     if (flow->mark != INVALID_FLOW_MARK) {
         queue_netdev_flow_del(pmd, flow);
     }
+    if (flow->port_forward_txp) {
+        struct tx_port *p = flow->port_forward_txp;
+        if (p->port_forward_flow == flow) {
+            VLOG_DBG("Deleting port_forward_flow: port: %p flow: %p",
+                      p, flow);
+            p->port_forward_flow = NULL;
+            flow->port_forward_txp = NULL;
+        }
+    }
     flow->dead = true;
 
     dp_netdev_flow_unref(flow);
@@ -3664,6 +3680,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
     flow->dead = false;
     flow->batch = NULL;
     flow->mark = INVALID_FLOW_MARK;
+    flow->port_forward_txp = NULL;
     *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
     *CONST_CAST(struct flow *, &flow->flow) = match->flow;
     *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
@@ -4498,6 +4515,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t port_no,
     int error = 0;
     const char *affinity_list = smap_get(cfg, "pmd-rxq-affinity");
     bool emc_enabled = smap_get_bool(cfg, "emc-enable", true);
+    bool port_forward = smap_get_bool(cfg, "port-forward", false);
 
     ovs_mutex_lock(&dp->port_mutex);
     error = get_port_by_number(dp, port_no, &port);
@@ -4505,6 +4523,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t port_no,
         goto unlock;
     }
 
+    port->port_forward = port_forward;
     if (emc_enabled != port->emc_enabled) {
         struct dp_netdev_pmd_thread *pmd;
         struct ds ds = DS_EMPTY_INITIALIZER;
@@ -4725,7 +4744,7 @@ dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
 static int
 dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
                            struct dp_netdev_rxq *rxq,
-                           odp_port_t port_no)
+                           odp_port_t port_no, bool port_forward)
 {
     struct pmd_perf_stats *s = &pmd->perf_stats;
     struct dp_packet_batch batch;
@@ -4765,7 +4784,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
             }
         }
         /* Process packet batch. */
-        dp_netdev_input(pmd, &batch, port_no);
+        dp_netdev_input(pmd, &batch, port_no, port_forward);
 
         /* Assign processing cycles to rx queue. */
         cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
@@ -5791,7 +5810,8 @@ dpif_netdev_run(struct dpif *dpif)
 
                     if (dp_netdev_process_rxq_port(non_pmd,
                                                    &port->rxqs[i],
-                                                   port->port_no)) {
+                                                   port->port_no,
+                                                   port->port_forward)) {
                         need_to_flush = false;
                     }
                 }
@@ -5962,6 +5982,7 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
         poll_list[i].rxq = poll->rxq;
         poll_list[i].port_no = poll->rxq->port->port_no;
         poll_list[i].emc_enabled = poll->rxq->port->emc_enabled;
+        poll_list[i].port_forward = poll->rxq->port->port_forward;
         poll_list[i].rxq_enabled = netdev_rxq_enabled(poll->rxq->rx);
         poll_list[i].change_seq =
                      netdev_get_change_seq(poll->rxq->port->netdev);
@@ -6056,7 +6077,8 @@ reload:
 
             process_packets =
                 dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
-                                           poll_list[i].port_no);
+                                           poll_list[i].port_no,
+                                           poll_list[i].port_forward);
             rx_packets += process_packets;
         }
 
@@ -7064,6 +7086,13 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
     pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
 }
 
+static struct tx_port *
+pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+                           odp_port_t port_no)
+{
+    return tx_port_lookup(&pmd->send_port_cache, port_no);
+}
+
 /* Try to process all ('cnt') the 'packets' using only the datapath flow cache
  * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
  * miniflow is copied into 'keys' and the packet pointer is moved at the
@@ -7087,7 +7116,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
                struct packet_batch_per_flow batches[], size_t *n_batches,
                struct dp_packet_flow_map *flow_map,
                size_t *n_flows, uint8_t *index_map,
-               bool md_is_valid, odp_port_t port_no)
+               bool md_is_valid, odp_port_t port_no, bool port_forward)
 {
     struct netdev_flow_key *key = &keys[0];
     size_t n_missed = 0, n_emc_hit = 0;
@@ -7127,6 +7156,19 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
             pkt_metadata_init(&packet->md, port_no);
         }
 
+        if (OVS_UNLIKELY(port_forward)) {
+            struct tx_port *p;
+
+            tcp_flags = parse_tcp_flags(packet);
+            p = pmd_send_port_cache_lookup(pmd, port_no);
+            if (p->port_forward_flow) {
+                dp_netdev_queue_batches(packet, p->port_forward_flow,
+                                        tcp_flags, batches, n_batches);
+                COVERAGE_INC(datapath_port_forward_packet);
+                continue;
+            }
+        }
+
         if ((*recirc_depth_get() == 0) &&
             dp_packet_has_flow_mark(packet, &mark)) {
             flow = mark_to_flow_find(pmd, mark);
@@ -7170,6 +7212,13 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
                 packet_enqueue_to_flow_map(packet, flow, tcp_flags,
                                            flow_map, map_cnt++);
             }
+            if (OVS_UNLIKELY(port_forward)) {
+                struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no);
+                VLOG_DBG("Setting port_forward_flow: port: %p flow: %p",
+                         p, flow);
+                p->port_forward_flow = flow;
+                flow->port_forward_txp = p;
+            }
         } else {
             /* Exact match cache missed. Group missed packets together at
              * the beginning of the 'packets' array. */
@@ -7410,7 +7459,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 static void
 dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
                   struct dp_packet_batch *packets,
-                  bool md_is_valid, odp_port_t port_no)
+                  bool md_is_valid, odp_port_t port_no, bool port_forward)
 {
 #if !defined(__CHECKER__) && !defined(_WIN32)
     const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
@@ -7431,7 +7480,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
 
     n_batches = 0;
     dfc_processing(pmd, packets, keys, missed_keys, batches, &n_batches,
-                   flow_map, &n_flows, index_map, md_is_valid, port_no);
+                   flow_map, &n_flows, index_map, md_is_valid, port_no,
+                   port_forward);
 
     if (!dp_packet_batch_is_empty(packets)) {
         /* Get ingress port from first packet's metadata. */
@@ -7472,16 +7522,16 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
 static void
 dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
                 struct dp_packet_batch *packets,
-                odp_port_t port_no)
+                odp_port_t port_no, bool port_forward)
 {
-    dp_netdev_input__(pmd, packets, false, port_no);
+    dp_netdev_input__(pmd, packets, false, port_no, port_forward);
 }
 
 static void
 dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
                       struct dp_packet_batch *packets)
 {
-    dp_netdev_input__(pmd, packets, true, 0);
+    dp_netdev_input__(pmd, packets, true, 0, false);
 }
 
 struct dp_netdev_execute_aux {
@@ -7581,13 +7631,6 @@ pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
     return tx_port_lookup(&pmd->tnl_port_cache, port_no);
 }
 
-static struct tx_port *
-pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
-                           odp_port_t port_no)
-{
-    return tx_port_lookup(&pmd->send_port_cache, port_no);
-}
-
 static int
 push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
                 const struct nlattr *attr,
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 4597a215d..11ce80aff 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3369,6 +3369,17 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
           Defaults to true.
         </p>
       </column>
+
+      <column name="other_config" key="port-forward"
+              type='{"type": "boolean"}'>
+        <p>
+          Specifies if port forwarding should be used while processing
+          packets received from this interface.
+        </p>
+        <p>
+          Defaults to false.
+        </p>
+      </column>
     </group>
 
     <group title="MTU">
-- 
2.30.0.349.g30b29f044a


-- 
This electronic communication and the information and any files transmitted 
with it, or attached to it, are confidential and are intended solely for 
the use of the individual or entity to whom it is addressed and may contain 
information that is confidential, legally privileged, protected by privacy 
laws, or otherwise restricted from disclosure to anyone else. If you are 
not the intended recipient or the person responsible for delivering the 
e-mail to the intended recipient, you are hereby notified that any use, 
copying, distributing, dissemination, forwarding, printing, or copying of 
this e-mail is strictly prohibited. If you received this e-mail in error, 
please return the e-mail to the sender, delete it from your computer, and 
destroy any printed copy of it.


More information about the dev mailing list