[ovs-dev] [PATCH v2] ovn-controller: fix crash after br-int.mgmt rconn disconnect
Han Zhou
zhouhan at gmail.com
Wed May 23 23:13:34 UTC 2018
ovn-controller abort was found in pinctrl_run() when debugging
an occasional test case failure of:
ovn-controller.at: Chassis external-id
Backtrace:
(gdb) bt
0 0x00007fd0f84878d7 in raise () from /lib64/libc.so.6
1 0x00007fd0f848953a in abort () from /lib64/libc.so.6
2 0x00000000004a6c9e in ovs_abort_valist (err_no=err_no at entry=0, format=format at entry=0x55d050 "%s: assertion %s failed in %s()", args=args at entry=0x7fff24390158) at lib/util.c:360
3 0x00000000004ad8b0 in vlog_abort_valist (module_=<optimized out>, message=0x55d050 "%s: assertion %s failed in %s()", args=args at entry=0x7fff24390158) at lib/vlog.c:1219
4 0x00000000004ad937 in vlog_abort (module=module at entry=0x7f6b20 <this_module>, message=message at entry=0x55d050 "%s: assertion %s failed in %s()") at lib/vlog.c:1233
5 0x00000000004a6a4e in ovs_assert_failure (where=where at entry=0x552ec3 "lib/ofp-msgs.c:1062", function=function at entry=0x553a20 <__func__.9268> "raw_instance_get", condition=condition at entry=0x552c00 "version >= info->min_version && version <= info->max_version") at lib/util.c:80
6 0x0000000000471fb4 in raw_instance_get (info=<optimized out>, version=<optimized out>) at lib/ofp-msgs.c:1062
7 0x0000000000472533 in ofpraw_put__ (raw=raw at entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=version at entry=255 '\377', xid=xid at entry=83886080, extra_tailroom=extra_tailroom at entry=0, buf=buf at entry=0x123b340) at lib/ofp-msgs.c:712
8 0x000000000047299c in ofpraw_alloc_xid (extra_tailroom=0, xid=83886080, version=255 '\377', raw=OFPRAW_NXT_SET_PACKET_IN_FORMAT) at lib/ofp-msgs.c:588
9 ofpraw_alloc (raw=raw at entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=<optimized out>, extra_tailroom=extra_tailroom at entry=0) at lib/ofp-msgs.c:579
10 0x000000000047383a in ofputil_encode_set_packet_in_format (ofp_version=<optimized out>, format=format at entry=OFPUTIL_PACKET_IN_NXT2) at lib/ofp-packet.c:70
11 0x00000000004145d3 in pinctrl_setup () at ovn/controller/pinctrl.c:134
12 pinctrl_run (ctx=ctx at entry=0x7fff243904f0, br_int=br_int at entry=0x1216a50, chassis=chassis at entry=0x1239f90, chassis_index=chassis_index at entry=0x7fff243a1c00, local_datapaths=local_datapaths at entry=0x7fff243a1c20, active_tunnels=active_tunnels at entry=0x7fff243a1c80)
at ovn/controller/pinctrl.c:1258
13 0x00000000004076ce in main (argc=6, argv=0x7fff243a3e38) at ovn/controller/ovn-controller.c:1177
Root cause:
When entering pinctrl_setup() the rconn got Broken pipe error:
2018-05-19T06:10:06.697Z|00105|stream_fd|DBG|send: Broken pipe
2018-05-19T06:10:06.697Z|00106|vconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: sent (Broken pipe): OFPT_GET_CONFIG_REQUEST (OF1.3) (xid=0x4):
2018-05-19T06:10:06.697Z|00107|rconn|WARN|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: connection dropped (Broken pipe)
2018-05-19T06:10:06.697Z|00109|rconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: entering BACKOFF
2018-05-19T06:10:06.698Z|00110|util|EMER|lib/ofp-msgs.c:1062: assertion version >= info->min_version && version <= info->max_version failed in raw_instance_get()
(The connection is closed because the test case set datapath_type to
foo, which is invalid for OVS but the test case doesn't care.)
There are two message sendings in pinctrl_setup(). The first message
sending detected the connection lost and triggered disconnect()
which set rconn status to BACKOFF. However, before sending the second
message, it calls rconn_get_version() again. When rconn is not
connected, the rconn_get_version() returns -1, which is used when
calling ofputil_encode_set_packet_in_format(), which finally triggered
the abort().
This problem exists not only in pinctrl_setup(), but many other places
in pinctrl and ofctrl modules. In those places when calling
rconn_get_version(), the connection status may not be connected, and
rconn_get_version() may return -1, but the return value is not
checked and directly used for following function calls, so potentially
ovn-controller can crash in these places. This patch fixes these
problems.
Signed-off-by: Han Zhou <hzhou8 at ebay.com>
---
Notes:
v1->v2: correct the assignment from int to enum
ovn/controller/ofctrl.c | 6 ++++++
ovn/controller/pinctrl.c | 48 ++++++++++++++++++++++++++++++++++++++----------
2 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 349de3a..861547d 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -180,6 +180,9 @@ ofctrl_init(struct ovn_extend_table *group_table,
static void
run_S_NEW(void)
{
+ if (rconn_get_version(swconn) < 0) {
+ return;
+ }
struct ofpbuf *buf = ofpraw_alloc(OFPRAW_NXT_TLV_TABLE_REQUEST,
rconn_get_version(swconn), 0);
xid = queue_msg(buf);
@@ -804,6 +807,9 @@ add_meter_mod(const struct ofputil_meter_mod *mm, struct ovs_list *msgs)
static void
add_ct_flush_zone(uint16_t zone_id, struct ovs_list *msgs)
{
+ if (rconn_get_version(swconn) < 0) {
+ return;
+ }
struct ofpbuf *msg = ofpraw_alloc(OFPRAW_NXT_CT_FLUSH_ZONE,
rconn_get_version(swconn), 0);
struct nx_zone_id *nzi = ofpbuf_put_zeros(msg, sizeof *nzi);
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 305f206..a24ab0e 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -127,11 +127,15 @@ pinctrl_setup(void)
/* Fetch the switch configuration. The response later will allow us to
* change the miss_send_len to UINT16_MAX, so that we can enable
* asynchronous messages. */
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
- rconn_get_version(swconn), 0));
+ version, 0));
/* Set a packet-in format that supports userdata. */
- queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
+ queue_msg(ofputil_encode_set_packet_in_format(version,
OFPUTIL_PACKET_IN_NXT2));
}
@@ -139,7 +143,10 @@ static void
set_switch_config(struct rconn *swconn_,
const struct ofputil_switch_config *config)
{
- enum ofp_version version = rconn_get_version(swconn_);
+ int version = rconn_get_version(swconn_);
+ if (version < 0) {
+ return;
+ }
struct ofpbuf *request = ofputil_encode_set_config(config, version);
queue_msg(request);
}
@@ -152,9 +159,12 @@ set_actions_and_enqueue_msg(const struct dp_packet *packet,
/* Copy metadata from 'md' into the packet-out via "set_field"
* actions, then add actions from 'userdata'.
*/
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
uint64_t ofpacts_stub[4096 / 8];
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
- enum ofp_version version = rconn_get_version(swconn);
reload_metadata(&ofpacts, md);
enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
@@ -376,7 +386,10 @@ pinctrl_handle_put_dhcp_opts(
struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
struct ofpbuf *userdata, struct ofpbuf *continuation)
{
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
@@ -667,7 +680,10 @@ pinctrl_handle_put_dhcpv6_opts(
struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
@@ -861,7 +877,10 @@ pinctrl_handle_dns_lookup(
struct controller_ctx *ctx)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
@@ -1431,6 +1450,10 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
if (time_msec() < ra->next_announce) {
return ra->next_announce;
}
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return ra->next_announce;
+ }
uint64_t packet_stub[128 / 8];
struct dp_packet packet;
@@ -1471,7 +1494,6 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
};
match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
- enum ofp_version version = rconn_get_version(swconn);
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
queue_msg(ofputil_encode_packet_out(&po, proto));
dp_packet_uninit(&packet);
@@ -1897,6 +1919,10 @@ send_garp(struct garp_data *garp, long long int current_time)
return garp->announce_time;
}
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return garp->announce_time;
+ }
/* Compose a GARP request packet. */
uint64_t packet_stub[128 / 8];
struct dp_packet packet;
@@ -1912,7 +1938,6 @@ send_garp(struct garp_data *garp, long long int current_time)
/* Compose actions. The garp request is output on localnet ofport. */
uint64_t ofpacts_stub[4096 / 8];
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
- enum ofp_version version = rconn_get_version(swconn);
ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
struct ofputil_packet_out po = {
@@ -2372,7 +2397,10 @@ pinctrl_handle_put_nd_ra_opts(
struct ofpbuf *continuation)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
--
2.1.0
More information about the dev
mailing list