[ovs-dev] [PATCH 3/6] ovn: add egress loopback capability
Mickey Spiegel
mickeys.dev at gmail.com
Fri Dec 23 10:31:25 UTC 2016
This patch adds the capability to force loopback at the end of the
egress pipeline. A new flags.force_egress_loopback symbol is defined,
along with corresponding flags bits. When flags.force_egress_loopback
is set, at OFTABLE_LOG_TO_PHY, instead of the packet being sent out to
the peer patch port or out the outport, the packet is forced back to
the beginning of the ingress pipeline with inport = outport. All
other registers are cleared, as if the packet just arrived on that
inport.
This capability is needed in order to implement some of the east/west
distributed NAT flows.
Note: The existing flags.loopback allows a packet to go from the end
of the ingress pipeline to the beginning of the egress pipeline with
outport = inport, which is different.
Initially, there are no tests incorporated in this patch. This
functionality is tested in a subsequent distributed NAT flows patch.
Tests specific to egress loopback may be added once the capability
to inject a packet with one of the flags bits set is added.
Signed-off-by: Mickey Spiegel <mickeys.dev at gmail.com>
---
ovn/controller/physical.c | 38 ++++++++++++++++++++++++++++++++++----
ovn/lib/logical-fields.c | 8 ++++++++
ovn/lib/logical-fields.h | 14 ++++++++++++++
ovn/northd/ovn-northd.8.xml | 4 +++-
ovn/northd/ovn-northd.c | 2 ++
ovn/ovn-sb.xml | 2 +-
6 files changed, 62 insertions(+), 6 deletions(-)
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 3ea1290..cba1c0e 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -183,7 +183,7 @@ get_zone_ids(const struct sbrec_port_binding *binding,
}
static void
-put_local_common_flows(uint32_t dp_key, uint32_t port_key,
+put_local_common_flows(uint32_t dp_key, uint32_t port_key, ofp_port_t ofport,
bool nested_container, const struct zone_ids *zone_ids,
struct ofpbuf *ofpacts_p, struct hmap *flow_table)
{
@@ -258,6 +258,36 @@ put_local_common_flows(uint32_t dp_key, uint32_t port_key,
put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p);
put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, &match, ofpacts_p);
+
+ /* Table 65, Priority 150.
+ * =======================
+ *
+ * Send packets with MLF_FORCE_EGRESS_LOOPBACK flag back to the
+ * ingress pipeline with inport = outport. */
+
+ match_init_catchall(&match);
+ ofpbuf_clear(ofpacts_p);
+ match_set_metadata(&match, htonll(dp_key));
+ match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
+ match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
+ MLF_FORCE_EGRESS_LOOPBACK, MLF_FORCE_EGRESS_LOOPBACK);
+
+ size_t clone_ofs = ofpacts_p->size;
+ struct ofpact_nest *clone = ofpact_put_CLONE(ofpacts_p);
+ put_load(ofport, MFF_IN_PORT, 0, 16, ofpacts_p);
+ put_load(port_key, MFF_LOG_INPORT, 0, 32, ofpacts_p);
+ put_load(0, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+ put_load(MLF_EGRESS_LOOPBACK_OCCURRED, MFF_LOG_FLAGS, 0, 32, ofpacts_p);
+ for (int i = 0; i < MFF_N_LOG_REGS; i++) {
+ put_load(0, MFF_LOG_REG0 + i, 0, 32, ofpacts_p);
+ }
+ put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
+ clone = ofpbuf_at_assert(ofpacts_p, clone_ofs, sizeof *clone);
+ ofpacts_p->header = clone;
+ ofpact_finish_CLONE(ofpacts_p, &clone);
+
+ ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 150,
+ &match, ofpacts_p);
}
static void
@@ -320,7 +350,7 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
}
struct zone_ids binding_zones = get_zone_ids(binding, ct_zones);
- put_local_common_flows(dp_key, port_key, false, &binding_zones,
+ put_local_common_flows(dp_key, port_key, 0, false, &binding_zones,
ofpacts_p, flow_table);
match_init_catchall(&match);
@@ -489,8 +519,8 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
*/
struct zone_ids zone_ids = get_zone_ids(binding, ct_zones);
- put_local_common_flows(dp_key, port_key, nested_container, &zone_ids,
- ofpacts_p, flow_table);
+ put_local_common_flows(dp_key, port_key, ofport, nested_container,
+ &zone_ids, ofpacts_p, flow_table);
/* Table 0, Priority 150 and 100.
* ==============================
diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c
index fa134d6..c056e41 100644
--- a/ovn/lib/logical-fields.c
+++ b/ovn/lib/logical-fields.c
@@ -96,6 +96,14 @@ ovn_init_symtab(struct shash *symtab)
MLF_FORCE_SNAT_FOR_LB_BIT);
expr_symtab_add_subfield(symtab, "flags.force_snat_for_lb", NULL,
flags_str);
+ snprintf(flags_str, sizeof flags_str, "flags[%d]",
+ MLF_FORCE_EGRESS_LOOPBACK_BIT);
+ expr_symtab_add_subfield(symtab, "flags.force_egress_loopback", NULL,
+ flags_str);
+ snprintf(flags_str, sizeof flags_str, "flags[%d]",
+ MLF_EGRESS_LOOPBACK_OCCURRED_BIT);
+ expr_symtab_add_subfield(symtab, "flags.egress_loopback_occurred", NULL,
+ flags_str);
/* Connection tracking state. */
expr_symtab_add_field(symtab, "ct_mark", MFF_CT_MARK, NULL, false);
diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
index 696c529..87ce695 100644
--- a/ovn/lib/logical-fields.h
+++ b/ovn/lib/logical-fields.h
@@ -49,6 +49,8 @@ enum mff_log_flags_bits {
MLF_RCV_FROM_VXLAN_BIT = 1,
MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
MLF_FORCE_SNAT_FOR_LB_BIT = 3,
+ MLF_FORCE_EGRESS_LOOPBACK_BIT = 4,
+ MLF_EGRESS_LOOPBACK_OCCURRED_BIT = 5,
};
/* MFF_LOG_FLAGS_REG flag assignments */
@@ -69,6 +71,18 @@ enum mff_log_flags {
/* Indicate that a packet needs a force SNAT in the gateway router when
* load-balancing has taken place. */
MLF_FORCE_SNAT_FOR_LB = (1 << MLF_FORCE_SNAT_FOR_LB_BIT),
+
+ /* Indicate that at the end of the egress pipeline in table
+ * OFTABLE_LOG_TO_PHY, instead of being sent to the peer patch port or
+ * out the outport, the packet should be forced back to the beginning
+ * of the ingress pipeline with inport = outport. */
+ MLF_FORCE_EGRESS_LOOPBACK = (1 << MLF_FORCE_EGRESS_LOOPBACK_BIT),
+
+ /* Indicate that this packet has been recirculated using egress
+ * loopback. This allows certain checks to be bypassed, such as a
+ * logical router dropping packets with source IP address equals
+ * one of the logical router's own IP addresses. */
+ MLF_EGRESS_LOOPBACK_OCCURRED = (1 << MLF_EGRESS_LOOPBACK_OCCURRED_BIT),
};
#endif /* ovn/lib/logical-fields.h */
diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index d9ab201..b8af946 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -905,7 +905,9 @@ output;
</li>
<li>
<code>ip4.src</code> or <code>ip6.src</code> is any IP
- address owned by the router.
+ address owned by the router, unless the packet was recirculated
+ due to egress loopback as indicated by
+ <code>flags.egress_loopback_occurred</code>
</li>
<li>
<code>ip4.src</code> is the broadcast address of any IP network
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index b64cc0d..6779d46 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -3717,6 +3717,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&match);
ds_put_cstr(&match, "ip4.src == ");
op_put_v4_networks(&match, op, true);
+ ds_put_cstr(&match, " && flags.egress_loopback_occurred == 0");
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
ds_cstr(&match), "drop;");
@@ -3971,6 +3972,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ds_clear(&match);
ds_put_cstr(&match, "ip6.src == ");
op_put_v6_networks(&match, op);
+ ds_put_cstr(&match, " && flags.egress_loopback_occurred == 0");
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
ds_cstr(&match), "drop;");
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 43c6f11..28a8881 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -818,7 +818,7 @@
<li><code>reg0</code>...<code>reg9</code></li>
<li><code>xxreg0</code> <code>xxreg1</code></li>
<li><code>inport</code> <code>outport</code></li>
- <li><code>flags.loopback</code></li>
+ <li><code>flags.loopback</code><code>flags.force_egress_loopback</code><code>flags.egress_loopback_occurred</code></li>
<li><code>eth.src</code> <code>eth.dst</code> <code>eth.type</code></li>
<li><code>vlan.tci</code> <code>vlan.vid</code> <code>vlan.pcp</code> <code>vlan.present</code></li>
<li><code>ip.proto</code> <code>ip.dscp</code> <code>ip.ecn</code> <code>ip.ttl</code> <code>ip.frag</code></li>
--
1.9.1
More information about the dev
mailing list