[ovs-dev] [PATCH v2] ovn-controller: fix crash after br-int.mgmt rconn disconnect

Han Zhou zhouhan at gmail.com
Wed May 23 23:13:34 UTC 2018


ovn-controller abort was found in pinctrl_run() when debugging
an occasional test case failure of:
    ovn-controller.at: Chassis external-id

Backtrace:
(gdb) bt
0  0x00007fd0f84878d7 in raise () from /lib64/libc.so.6
1  0x00007fd0f848953a in abort () from /lib64/libc.so.6
2  0x00000000004a6c9e in ovs_abort_valist (err_no=err_no at entry=0, format=format at entry=0x55d050 "%s: assertion %s failed in %s()", args=args at entry=0x7fff24390158) at lib/util.c:360
3  0x00000000004ad8b0 in vlog_abort_valist (module_=<optimized out>, message=0x55d050 "%s: assertion %s failed in %s()", args=args at entry=0x7fff24390158) at lib/vlog.c:1219
4  0x00000000004ad937 in vlog_abort (module=module at entry=0x7f6b20 <this_module>, message=message at entry=0x55d050 "%s: assertion %s failed in %s()") at lib/vlog.c:1233
5  0x00000000004a6a4e in ovs_assert_failure (where=where at entry=0x552ec3 "lib/ofp-msgs.c:1062", function=function at entry=0x553a20 <__func__.9268> "raw_instance_get", condition=condition at entry=0x552c00 "version >= info->min_version && version <= info->max_version") at lib/util.c:80
6  0x0000000000471fb4 in raw_instance_get (info=<optimized out>, version=<optimized out>) at lib/ofp-msgs.c:1062
7  0x0000000000472533 in ofpraw_put__ (raw=raw at entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=version at entry=255 '\377', xid=xid at entry=83886080, extra_tailroom=extra_tailroom at entry=0, buf=buf at entry=0x123b340) at lib/ofp-msgs.c:712
8  0x000000000047299c in ofpraw_alloc_xid (extra_tailroom=0, xid=83886080, version=255 '\377', raw=OFPRAW_NXT_SET_PACKET_IN_FORMAT) at lib/ofp-msgs.c:588
9  ofpraw_alloc (raw=raw at entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=<optimized out>, extra_tailroom=extra_tailroom at entry=0) at lib/ofp-msgs.c:579
10 0x000000000047383a in ofputil_encode_set_packet_in_format (ofp_version=<optimized out>, format=format at entry=OFPUTIL_PACKET_IN_NXT2) at lib/ofp-packet.c:70
11 0x00000000004145d3 in pinctrl_setup () at ovn/controller/pinctrl.c:134
12 pinctrl_run (ctx=ctx at entry=0x7fff243904f0, br_int=br_int at entry=0x1216a50, chassis=chassis at entry=0x1239f90, chassis_index=chassis_index at entry=0x7fff243a1c00, local_datapaths=local_datapaths at entry=0x7fff243a1c20, active_tunnels=active_tunnels at entry=0x7fff243a1c80)
    at ovn/controller/pinctrl.c:1258
13 0x00000000004076ce in main (argc=6, argv=0x7fff243a3e38) at ovn/controller/ovn-controller.c:1177

Root cause:
When entering pinctrl_setup() the rconn got Broken pipe error:

2018-05-19T06:10:06.697Z|00105|stream_fd|DBG|send: Broken pipe
2018-05-19T06:10:06.697Z|00106|vconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: sent (Broken pipe): OFPT_GET_CONFIG_REQUEST (OF1.3) (xid=0x4):
2018-05-19T06:10:06.697Z|00107|rconn|WARN|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: connection dropped (Broken pipe)
2018-05-19T06:10:06.697Z|00109|rconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: entering BACKOFF
2018-05-19T06:10:06.698Z|00110|util|EMER|lib/ofp-msgs.c:1062: assertion version >= info->min_version && version <= info->max_version failed in raw_instance_get()

(The connection is closed because the test case set datapath_type to
foo, which is invalid for OVS but the test case doesn't care.)

There are two message sendings in pinctrl_setup(). The first message
sending detected the connection lost and triggered disconnect()
which set rconn status to BACKOFF. However, before sending the second
message, it calls rconn_get_version() again. When rconn is not
connected, the rconn_get_version() returns -1, which is used when
calling ofputil_encode_set_packet_in_format(), which finally triggered
the abort().

This problem exists not only in pinctrl_setup(), but many other places
in pinctrl and ofctrl modules. In those places when calling
rconn_get_version(), the connection status may not be connected, and
rconn_get_version() may return -1, but the return value is not
checked and directly used for following function calls, so potentially
ovn-controller can crash in these places. This patch fixes these
problems.

Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---

Notes:
    v1->v2: correct the assignment from int to enum

 ovn/controller/ofctrl.c  |  6 ++++++
 ovn/controller/pinctrl.c | 48 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 349de3a..861547d 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -180,6 +180,9 @@ ofctrl_init(struct ovn_extend_table *group_table,
 static void
 run_S_NEW(void)
 {
+    if (rconn_get_version(swconn) < 0) {
+        return;
+    }
     struct ofpbuf *buf = ofpraw_alloc(OFPRAW_NXT_TLV_TABLE_REQUEST,
                                       rconn_get_version(swconn), 0);
     xid = queue_msg(buf);
@@ -804,6 +807,9 @@ add_meter_mod(const struct ofputil_meter_mod *mm, struct ovs_list *msgs)
 static void
 add_ct_flush_zone(uint16_t zone_id, struct ovs_list *msgs)
 {
+    if (rconn_get_version(swconn) < 0) {
+        return;
+    }
     struct ofpbuf *msg = ofpraw_alloc(OFPRAW_NXT_CT_FLUSH_ZONE,
                                       rconn_get_version(swconn), 0);
     struct nx_zone_id *nzi = ofpbuf_put_zeros(msg, sizeof *nzi);
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 305f206..a24ab0e 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -127,11 +127,15 @@ pinctrl_setup(void)
     /* Fetch the switch configuration.  The response later will allow us to
      * change the miss_send_len to UINT16_MAX, so that we can enable
      * asynchronous messages. */
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
-                           rconn_get_version(swconn), 0));
+                           version, 0));
 
     /* Set a packet-in format that supports userdata.  */
-    queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
+    queue_msg(ofputil_encode_set_packet_in_format(version,
                                                   OFPUTIL_PACKET_IN_NXT2));
 }
 
@@ -139,7 +143,10 @@ static void
 set_switch_config(struct rconn *swconn_,
                   const struct ofputil_switch_config *config)
 {
-    enum ofp_version version = rconn_get_version(swconn_);
+    int version = rconn_get_version(swconn_);
+    if (version < 0) {
+        return;
+    }
     struct ofpbuf *request = ofputil_encode_set_config(config, version);
     queue_msg(request);
 }
@@ -152,9 +159,12 @@ set_actions_and_enqueue_msg(const struct dp_packet *packet,
     /* Copy metadata from 'md' into the packet-out via "set_field"
      * actions, then add actions from 'userdata'.
      */
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     uint64_t ofpacts_stub[4096 / 8];
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
-    enum ofp_version version = rconn_get_version(swconn);
 
     reload_metadata(&ofpacts, md);
     enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
@@ -376,7 +386,10 @@ pinctrl_handle_put_dhcp_opts(
     struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
     struct ofpbuf *userdata, struct ofpbuf *continuation)
 {
-    enum ofp_version version = rconn_get_version(swconn);
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
@@ -667,7 +680,10 @@ pinctrl_handle_put_dhcpv6_opts(
     struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-    enum ofp_version version = rconn_get_version(swconn);
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
@@ -861,7 +877,10 @@ pinctrl_handle_dns_lookup(
     struct controller_ctx *ctx)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-    enum ofp_version version = rconn_get_version(swconn);
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
@@ -1431,6 +1450,10 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
     if (time_msec() < ra->next_announce) {
         return ra->next_announce;
     }
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return ra->next_announce;
+    }
 
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
@@ -1471,7 +1494,6 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
     };
 
     match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
-    enum ofp_version version = rconn_get_version(swconn);
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     queue_msg(ofputil_encode_packet_out(&po, proto));
     dp_packet_uninit(&packet);
@@ -1897,6 +1919,10 @@ send_garp(struct garp_data *garp, long long int current_time)
         return garp->announce_time;
     }
 
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return garp->announce_time;
+    }
     /* Compose a GARP request packet. */
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
@@ -1912,7 +1938,6 @@ send_garp(struct garp_data *garp, long long int current_time)
     /* Compose actions.  The garp request is output on localnet ofport. */
     uint64_t ofpacts_stub[4096 / 8];
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
-    enum ofp_version version = rconn_get_version(swconn);
     ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
 
     struct ofputil_packet_out po = {
@@ -2372,7 +2397,10 @@ pinctrl_handle_put_nd_ra_opts(
     struct ofpbuf *continuation)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-    enum ofp_version version = rconn_get_version(swconn);
+    int version = rconn_get_version(swconn);
+    if (version < 0) {
+        return;
+    }
     enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
     struct dp_packet *pkt_out_ptr = NULL;
     uint32_t success = 0;
-- 
2.1.0



More information about the dev mailing list