[ovs-dev] [PATCH] ovs-vswitchd: Add --mlockall option and enable on XenServer.

Ben Pfaff blp at nicira.com
Tue Dec 1 17:26:55 UTC 2009


Thank you.  I pushed this out.

Justin Pettit <jpettit at nicira.com> writes:

> Looks like a good fix to me.
>
> --Justin
>
>
> On Nov 30, 2009, at 1:17 PM, Ben Pfaff wrote:
>
>> On XenServer 5.5 we found that running 4 simultaneous vm-import operations
>> on iSCSI caused so much disk and cache activity that (we suspect) parts of
>> ovs-vswitchd were paged out to disk and were not paged back in for over
>> 10 seconds, causing the XenServer to fall off the network and the XenCenter
>> connection to fail.
>> 
>> Locking ovs-vswitchd into memory appears to avoid this problem.  Henrik
>> reports that, with memory locking, importing 11 VMs simultaneously
>> completed successfully.
>> 
>> Bug #2344.
>> ---
>> configure.ac                                      |    1 +
>> vswitchd/ovs-vswitchd.8.in                        |   11 +++++++++++
>> vswitchd/ovs-vswitchd.c                           |   15 +++++++++++++++
>> xenserver/etc_init.d_vswitch                      |    8 ++++++--
>> xenserver/root_vswitch_scripts_sysconfig.template |    7 +++++++
>> 5 files changed, 40 insertions(+), 2 deletions(-)
>> 
>> diff --git a/configure.ac b/configure.ac
>> index edfe0f7..239239e 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -47,6 +47,7 @@ OVS_CHECK_LINUX_VT_H
>> OVS_CHECK_PCRE
>> OVS_CHECK_IF_PACKET
>> OVS_CHECK_STRTOK_R
>> +AC_CHECK_FUNCS([mlockall])
>> 
>> if $build_userspace; then
>>     OVS_CHECK_PKIDIR
>> diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in
>> index 5c8d6c7..e9c11f4 100644
>> --- a/vswitchd/ovs-vswitchd.8.in
>> +++ b/vswitchd/ovs-vswitchd.8.in
>> @@ -67,6 +67,17 @@ Open vSwitch distribution for instructions on how to build and load
>> the Open vSwitch kernel module.
>> .PP
>> .SH OPTIONS
>> +.IP "\fB--mlockall\fR"
>> +Causes \fBovs\-vswitchd\fR to call the \fBmlockall()\fR function, to
>> +attempt to lock all of its process memory into physical RAM,
>> +preventing the kernel from paging any of its memory to disk.  This
>> +helps to avoid networking interruptions due to system memory pressure.
>> +.IP
>> +Some systems do not support \fBmlockall()\fR at all, and other systems
>> +only allow privileged users, such as the superuser, to use it.
>> +\fBovs\-vswitchd\fR emits a log message if \fBmlockall()\fR is
>> +unavailable or unsuccessful.
>> +.
>> .IP "\fB--fake-proc-net\fR"
>> Causes \fBovs\-vswitchd\fR to simulate some files in \fB/proc/net/vlan\fR
>> and \fB/proc/net/bonding\fR that some legacy software expects to
>> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
>> index 01645ad..3309c08 100644
>> --- a/vswitchd/ovs-vswitchd.c
>> +++ b/vswitchd/ovs-vswitchd.c
>> @@ -22,6 +22,9 @@
>> #include <signal.h>
>> #include <stdlib.h>
>> #include <string.h>
>> +#ifdef HAVE_MLOCKALL
>> +#include <sys/mman.h>
>> +#endif
>> 
>> #include "bridge.h"
>> #include "cfg.h"
>> @@ -148,6 +151,7 @@ parse_options(int argc, char *argv[])
>> {
>>     enum {
>>         OPT_PEER_CA_CERT = UCHAR_MAX + 1,
>> +        OPT_MLOCKALL,
>>         OPT_FAKE_PROC_NET,
>>         VLOG_OPTION_ENUMS,
>>         LEAK_CHECKER_OPTION_ENUMS
>> @@ -155,6 +159,7 @@ parse_options(int argc, char *argv[])
>>     static struct option long_options[] = {
>>         {"help",        no_argument, 0, 'h'},
>>         {"version",     no_argument, 0, 'V'},
>> +        {"mlockall",    no_argument, 0, OPT_MLOCKALL},
>>         {"fake-proc-net", no_argument, 0, OPT_FAKE_PROC_NET},
>>         DAEMON_LONG_OPTIONS,
>>         VLOG_LONG_OPTIONS,
>> @@ -186,6 +191,16 @@ parse_options(int argc, char *argv[])
>>             OVS_PRINT_VERSION(OFP_VERSION, OFP_VERSION);
>>             exit(EXIT_SUCCESS);
>> 
>> +        case OPT_MLOCKALL:
>> +#ifdef HAVE_MLOCKALL
>> +            if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
>> +                VLOG_ERR("mlockall failed: %s", strerror(errno));
>> +            }
>> +#else
>> +            VLOG_ERR("mlockall not supported on this system");
>> +#endif
>> +            break;
>> +
>>         case OPT_FAKE_PROC_NET:
>>             error = proc_net_compat_init();
>>             if (error) {
>> diff --git a/xenserver/etc_init.d_vswitch b/xenserver/etc_init.d_vswitch
>> index 4050d5a..e8e04ad 100755
>> --- a/xenserver/etc_init.d_vswitch
>> +++ b/xenserver/etc_init.d_vswitch
>> @@ -34,6 +34,7 @@ VSWITCHD_CONF="${VSWITCHD_CONF:-/etc/ovs-vswitchd.conf}"
>> VSWITCHD_PIDFILE="${VSWITCHD_PIDFILE:-/var/run/ovs-vswitchd.pid}"
>> VSWITCHD_RUN_DIR="${VSWITCHD_RUN_DIR:-/var/xen/vswitch}"
>> VSWITCHD_PRIORITY="${VSWITCHD_PRIORITY:--10}"
>> +VSWITCHD_MLOCKALL="${VSWITCHD_MLOCKALL:-yes}"
>> VSWITCHD_LOGFILE="${VSWITCHD_LOGFILE:-/var/log/ovs-vswitchd.log}"
>> VSWITCHD_FILE_LOGLEVEL="${VSWITCHD_FILE_LOGLEVEL:-INFO}"
>> VSWITCHD_SYSLOG_LOGLEVEL="${VSWITCHD_SYSLOG_LOGLEVEL:-ERR}"
>> @@ -159,12 +160,15 @@ function start_vswitchd {
>>     if [ "$ENABLE_FAKE_PROC_NET" = "y" ]; then
>>         fake_proc_net_opt="--fake-proc-net"
>>     fi
>> +    if [ "$VSWITCHD_MLOCKALL" != "no" ]; then
>> +        mlockall_opt="--mlockall"
>> +    fi
>>     if [ "$daemonize" != "y" ]; then
>>         # Start in background and force a "success" message
>>         action "Starting ovs-vswitchd ($strace_opt$valgrind_opt)" true
>> -        (nice -n "$VSWITCHD_PRIORITY" $strace_opt $valgrind_opt "$vswitchd" --pidfile="$VSWITCHD_PIDFILE" --detach --no-chdir $fake_proc_net_opt -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt "$VSWITCHD_CONF") &
>> +        (nice -n "$VSWITCHD_PRIORITY" $strace_opt $valgrind_opt "$vswitchd" --pidfile="$VSWITCHD_PIDFILE" --detach --no-chdir $fake_proc_net_opt -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt $mlockall_opt "$VSWITCHD_CONF") &
>>     else
>> -        action "Starting ovs-vswitchd" nice -n "$VSWITCHD_PRIORITY" "$vswitchd" --pidfile="$VSWITCHD_PIDFILE" --detach --no-chdir $fake_proc_net_opt -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt "$VSWITCHD_CONF"
>> +        action "Starting ovs-vswitchd" nice -n "$VSWITCHD_PRIORITY" "$vswitchd" --pidfile="$VSWITCHD_PIDFILE" --detach --no-chdir $fake_proc_net_opt -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt $mlockall_opt "$VSWITCHD_CONF"
>>     fi
>> }
>> 
>> diff --git a/xenserver/root_vswitch_scripts_sysconfig.template b/xenserver/root_vswitch_scripts_sysconfig.template
>> index f848e7b..2578d11 100644
>> --- a/xenserver/root_vswitch_scripts_sysconfig.template
>> +++ b/xenserver/root_vswitch_scripts_sysconfig.template
>> @@ -43,6 +43,13 @@
>> #     processes.
>> # VSWITCHD_PRIORITY=-10
>> 
>> +# VSWITCHD_MLOCKALL: Whether to pass ovs-vswitchd the --mlockall option.
>> +#     This option should be set to "yes" or "no".  The default is "yes".
>> +#     Enabling this option can avoid networking interruptions due to
>> +#     system memory pressure in extraordinary situations, such as multiple
>> +#     concurrent VM import operations.
>> +# VSWITCHD_MLOCKALL=yes
>> +
>> # VSWITCHD_LOGFILE: File to send the FILE_LOGLEVEL log messages to.
>> # VSWITCHD_LOGFILE=/var/log/ovs-vswitchd.log
>> 
>> -- 
>> 1.6.3.3
>> 
>> 
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev_openvswitch.org




More information about the dev mailing list