[ovs-dev] [branch-1.1 07/34] daemon: Avoid races on pidfile creation.

Ben Pfaff blp at nicira.com
Fri Apr 29 21:56:38 UTC 2011


Until now, if two copies of one OVS daemon started up at the same time,
then due to races in pidfile creation it was possible for both of them to
start successfully, instead of just one.  This was made worse when a
previous copy of the daemon had died abruptly, leaving a stale pidfile.

This commit implements a new pidfile creation and removal protocol that I
believe closes these races.  Now, a pidfile is asserted with "link" instead
of "rename", which prevents the race on creation, and a stale pidfile may
only be deleted by a process after it has taken a lock on it.

This may solve mysterious problems seen occasionally on vswitch restart.
I'm still puzzled by these problems, however, because I don't see anything
in our tests cases that would actually cause two copies of a daemon to
start at the same time, which as far as I can see is a necessary
precondition for the problem.
---
 lib/daemon.c         |  257 +++++++++++++++++++++++++++++++-------------------
 python/ovs/daemon.py |  183 +++++++++++++++++++++---------------
 tests/test-daemon.py |    2 +
 3 files changed, 267 insertions(+), 175 deletions(-)

diff --git a/lib/daemon.c b/lib/daemon.c
index a9cada8..aa971f2 100644
--- a/lib/daemon.c
+++ b/lib/daemon.c
@@ -61,6 +61,9 @@ static int daemonize_fd = -1;
  * it dies due to an error signal? */
 static bool monitor;
 
+static void check_already_running(void);
+static int lock_pidfile(FILE *, int command);
+
 /* Returns the file name that would be used for a pidfile if 'name' were
  * provided to set_pidfile().  The caller must free the returned string. */
 char *
@@ -139,87 +142,77 @@ daemon_set_monitor(void)
     monitor = true;
 }
 
-/* If a locked pidfile exists, issue a warning message and, unless
- * ignore_existing_pidfile() has been called, terminate the program. */
-static void
-die_if_already_running(void)
-{
-    pid_t pid;
-    if (!pidfile) {
-        return;
-    }
-    pid = read_pidfile_if_exists(pidfile);
-    if (pid > 0) {
-        if (!overwrite_pidfile) {
-            VLOG_ERR("%s: %s already running as pid %ld, aborting",
-                      get_pidfile(), program_name, (long int) pid);
-            ovs_fatal(0, "%s: already running as pid %ld",
-                      get_pidfile(), (long int) pid);
-        } else {
-            VLOG_WARN("%s: %s already running as pid %ld",
-                      get_pidfile(), program_name, (long int) pid);
-        }
-    }
-}
-
 /* If a pidfile has been configured, creates it and stores the running
  * process's pid in it.  Ensures that the pidfile will be deleted when the
  * process exits. */
 static void
 make_pidfile(void)
 {
-    if (pidfile) {
-        /* Create pidfile via temporary file, so that observers never see an
-         * empty pidfile or an unlocked pidfile. */
-        long int pid = getpid();
-        char *tmpfile;
-        int fd;
-
-        tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
-        fatal_signal_add_file_to_unlink(tmpfile);
-        fd = open(tmpfile, O_CREAT | O_WRONLY | O_TRUNC, 0666);
-        if (fd >= 0) {
-            struct flock lck;
-            lck.l_type = F_WRLCK;
-            lck.l_whence = SEEK_SET;
-            lck.l_start = 0;
-            lck.l_len = 0;
-            if (fcntl(fd, F_SETLK, &lck) != -1) {
-                char *text = xasprintf("%ld\n", pid);
-                if (write(fd, text, strlen(text)) == strlen(text)) {
-                    fatal_signal_add_file_to_unlink(pidfile);
-                    if (rename(tmpfile, pidfile) < 0) {
-                        VLOG_ERR("failed to rename \"%s\" to \"%s\": %s",
-                                 tmpfile, pidfile, strerror(errno));
-                        fatal_signal_remove_file_to_unlink(pidfile);
-                        close(fd);
-                    } else {
-                        /* Keep 'fd' open to retain the lock. */
-                        struct stat s;
-
-                        if (!fstat(fd, &s)) {
-                            pidfile_dev = s.st_dev;
-                            pidfile_ino = s.st_ino;
-                        } else {
-                            VLOG_ERR("%s: fstat failed: %s",
-                                     pidfile, strerror(errno));
-                        }
-                    }
-                } else {
-                    VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno));
-                    close(fd);
-                }
-                free(text);
-            } else {
-                VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno));
-                close(fd);
+    long int pid = getpid();
+    struct stat s;
+    char *tmpfile;
+    FILE *file;
+    int error;
+
+    /* Create a temporary pidfile. */
+    tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
+    fatal_signal_add_file_to_unlink(tmpfile);
+    file = fopen(tmpfile, "w+");
+    if (!file) {
+        VLOG_FATAL("%s: create failed (%s)", tmpfile, strerror(errno));
+    }
+
+    if (fstat(fileno(file), &s) == -1) {
+        VLOG_FATAL("%s: fstat failed (%s)", tmpfile, strerror(errno));
+    }
+
+    fprintf(file, "%ld\n", pid);
+    if (fflush(file) == EOF) {
+        VLOG_FATAL("%s: write failed (%s)", tmpfile, strerror(errno));
+    }
+
+    error = lock_pidfile(file, F_SETLK);
+    if (error) {
+        VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, strerror(error));
+    }
+
+    /* Rename or link it to the correct name. */
+    if (overwrite_pidfile) {
+        if (rename(tmpfile, pidfile) < 0) {
+            VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)",
+                       tmpfile, pidfile, strerror(errno));
+        }
+    } else {
+        do {
+            error = link(tmpfile, pidfile) == -1 ? errno : 0;
+            if (error == EEXIST) {
+                check_already_running();
             }
-        } else {
-            VLOG_ERR("%s: create failed: %s", tmpfile, strerror(errno));
+        } while (error == EINTR || error == EEXIST);
+        if (error) {
+            VLOG_FATAL("failed to link \"%s\" as \"%s\" (%s)",
+                       tmpfile, pidfile, strerror(error));
+        }
+    }
+
+    /* Ensure that the pidfile will get deleted on exit. */
+    fatal_signal_add_file_to_unlink(pidfile);
+
+    /* Delete the temporary pidfile if it still exists. */
+    if (!overwrite_pidfile) {
+        error = fatal_signal_unlink_file_now(tmpfile);
+        if (error) {
+            VLOG_FATAL("%s: unlink failed (%s)", tmpfile, strerror(error));
         }
-        fatal_signal_remove_file_to_unlink(tmpfile);
-        free(tmpfile);
     }
+
+    /* Clean up.
+     *
+     * We don't close 'file' because its file descriptor must remain open to
+     * hold the lock. */
+    pidfile_dev = s.st_dev;
+    pidfile_ino = s.st_ino;
+    free(tmpfile);
     free(pidfile);
     pidfile = NULL;
 }
@@ -449,8 +442,9 @@ daemonize_start(void)
         /* Running in daemon process. */
     }
 
-    die_if_already_running();
-    make_pidfile();
+    if (pidfile) {
+        make_pidfile();
+    }
 
     /* Make sure that the unixctl commands for vlog get registered in a
      * daemon, even before the first log message. */
@@ -490,12 +484,37 @@ daemon_usage(void)
         ovs_rundir(), program_name);
 }
 
+static int
+lock_pidfile__(FILE *file, int command, struct flock *lck)
+{
+    int error;
+
+    lck->l_type = F_WRLCK;
+    lck->l_whence = SEEK_SET;
+    lck->l_start = 0;
+    lck->l_len = 0;
+    lck->l_pid = 0;
+
+    do {
+        error = fcntl(fileno(file), command, lck) == -1 ? errno : 0;
+    } while (error == EINTR);
+    return error;
+}
+
+static int
+lock_pidfile(FILE *file, int command)
+{
+    struct flock lck;
+
+    return lock_pidfile__(file, command, &lck);
+}
+
 static pid_t
-read_pidfile__(const char *pidfile, bool must_exist)
+read_pidfile__(const char *pidfile, bool delete_if_stale)
 {
-    char line[128];
+    struct stat s, s2;
     struct flock lck;
-    struct stat s;
+    char line[128];
     FILE *file;
     int error;
 
@@ -510,9 +529,9 @@ read_pidfile__(const char *pidfile, bool must_exist)
         return getpid();
     }
 
-    file = fopen(pidfile, "r");
+    file = fopen(pidfile, "r+");
     if (!file) {
-        if (errno == ENOENT && !must_exist) {
+        if (errno == ENOENT && delete_if_stale) {
             return 0;
         }
         error = errno;
@@ -520,20 +539,53 @@ read_pidfile__(const char *pidfile, bool must_exist)
         goto error;
     }
 
-    lck.l_type = F_WRLCK;
-    lck.l_whence = SEEK_SET;
-    lck.l_start = 0;
-    lck.l_len = 0;
-    lck.l_pid = 0;
-    if (fcntl(fileno(file), F_GETLK, &lck)) {
-        error = errno;
+    error = lock_pidfile__(file, F_GETLK, &lck);
+    if (error) {
         VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error));
         goto error;
     }
     if (lck.l_type == F_UNLCK) {
-        error = ESRCH;
-        VLOG_WARN("%s: pid file is not locked", pidfile);
-        goto error;
+        /* pidfile exists but it isn't locked by anyone.  We need to delete it
+         * so that a new pidfile can go in its place.  But just calling
+         * unlink(pidfile) makes a nasty race: what if someone else unlinks it
+         * before we do and then replaces it by a valid pidfile?  We'd unlink
+         * their valid pidfile.  We do a little dance to avoid the race, by
+         * locking the invalid pidfile.  Only one process can have the invalid
+         * pidfile locked, and only that process has the right to unlink it. */
+        if (!delete_if_stale) {
+            error = ESRCH;
+            VLOG_WARN("%s: pid file is stale", pidfile);
+            goto error;
+        }
+
+        /* Get the lock. */
+        error = lock_pidfile(file, F_SETLK);
+        if (error) {
+            /* We lost a race with someone else doing the same thing. */
+            VLOG_WARN("%s: lost race to lock pidfile", pidfile);
+            goto error;
+        }
+
+        /* Is the file we have locked still named 'pidfile'? */
+        if (stat(pidfile, &s) || fstat(fileno(file), &s2)
+            || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) {
+            /* No.  We lost a race with someone else who got the lock before
+             * us, deleted the pidfile, and closed it (releasing the lock). */
+            error = EALREADY;
+            VLOG_WARN("%s: lost race to delete pidfile", pidfile);
+            goto error;
+        }
+
+        /* We won the right to delete the stale pidfile. */
+        if (unlink(pidfile)) {
+            error = errno;
+            VLOG_WARN("%s: failed to delete stale pidfile (%s)",
+                      pidfile, strerror(error));
+            goto error;
+        }
+        VLOG_DBG("%s: deleted stale pidfile", pidfile);
+        fclose(file);
+        return 0;
     }
 
     if (!fgets(line, sizeof line, file)) {
@@ -548,9 +600,12 @@ read_pidfile__(const char *pidfile, bool must_exist)
     }
 
     if (lck.l_pid != strtoul(line, NULL, 10)) {
+        /* The process that has the pidfile locked is not the process that
+         * created it.  It must be stale, with the process that has it locked
+         * preparing to delete it. */
         error = ESRCH;
-        VLOG_WARN("l_pid (%ld) != %s pid (%s)",
-                   (long int) lck.l_pid, pidfile, line);
+        VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld",
+                  pidfile, line, (long int) lck.l_pid);
         goto error;
     }
 
@@ -569,15 +624,19 @@ error:
 pid_t
 read_pidfile(const char *pidfile)
 {
-    return read_pidfile__(pidfile, true);
+    return read_pidfile__(pidfile, false);
 }
 
-
-/* Opens and reads a PID from 'pidfile', if it exists.  Returns 0 if 'pidfile'
- * doesn't exist, the positive PID if successful, otherwise a negative errno
- * value. */
-pid_t
-read_pidfile_if_exists(const char *pidfile)
+/* Checks whether a process with the given 'pidfile' is already running and,
+ * if so, aborts.  If 'pidfile' is stale, deletes it. */
+static void
+check_already_running(void)
 {
-    return read_pidfile__(pidfile, false);
+    long int pid = read_pidfile__(pidfile, true);
+    if (pid > 0) {
+        VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid);
+    } else if (pid < 0) {
+        VLOG_FATAL("%s: pidfile check failed (%s), aborting",
+                   pidfile, strerror(-pid));
+    }
 }
diff --git a/python/ovs/daemon.py b/python/ovs/daemon.py
index cfb4178..3d46deb 100644
--- a/python/ovs/daemon.py
+++ b/python/ovs/daemon.py
@@ -111,75 +111,80 @@ def set_monitor():
     global _monitor
     _monitor = True
 
-def _die_if_already_running():
-    """If a locked pidfile exists, issue a warning message and, unless
-    ignore_existing_pidfile() has been called, terminate the program."""
-    if _pidfile is None:
-        return
-    pid = read_pidfile_if_exists(_pidfile)
-    if pid > 0:
-        if not _overwrite_pidfile:
-            msg = "%s: already running as pid %d" % (_pidfile, pid)
-            logging.error("%s, aborting" % msg)
-            sys.stderr.write("%s\n" % msg)
-            sys.exit(1)
-        else:
-            logging.warn("%s: %s already running"
-                         % (get_pidfile(), ovs.util.PROGRAM_NAME))
+def _fatal(msg):
+    logging.error(msg)
+    sys.stderr.write("%s\n" % msg)
+    sys.exit(1)
 
 def _make_pidfile():
     """If a pidfile has been configured, creates it and stores the running
     process's pid in it.  Ensures that the pidfile will be deleted when the
     process exits."""
-    if _pidfile is not None:
-        # Create pidfile via temporary file, so that observers never see an
-        # empty pidfile or an unlocked pidfile.
-        pid = os.getpid()
-        tmpfile = "%s.tmp%d" % (_pidfile, pid)
-        ovs.fatal_signal.add_file_to_unlink(tmpfile)
+    pid = os.getpid()
 
-        try:
-            # This is global to keep Python from garbage-collecting and
-            # therefore closing our file after this function exits.  That would
-            # unlock the lock for us, and we don't want that.
-            global file
+    # Create a temporary pidfile.
+    tmpfile = "%s.tmp%d" % (_pidfile, pid)
+    ovs.fatal_signal.add_file_to_unlink(tmpfile)
+    try:
+        # This is global to keep Python from garbage-collecting and
+        # therefore closing our file after this function exits.  That would
+        # unlock the lock for us, and we don't want that.
+        global file
 
-            file = open(tmpfile, "w")
-        except IOError, e:
-            logging.error("%s: create failed: %s"
-                          % (tmpfile, os.strerror(e.errno)))
-            return
+        file = open(tmpfile, "w")
+    except IOError, e:
+        _fatal("%s: create failed (%s)" % (tmpfile, e.strerror))
 
-        try:
-            fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
-        except IOError, e:
-            logging.error("%s: fcntl failed: %s"
-                          % (tmpfile, os.strerror(e.errno)))
-            file.close()
-            return
+    try:
+        s = os.fstat(file.fileno())
+    except IOError, e:
+        _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror))
 
-        try:
-            file.write("%s\n" % pid)
-            file.flush()
-            ovs.fatal_signal.add_file_to_unlink(_pidfile)
-        except OSError, e:
-            logging.error("%s: write failed: %s"
-                          % (tmpfile, os.strerror(e.errno)))
-            file.close()
-            return
-            
+    try:
+        file.write("%s\n" % pid)
+        file.flush()
+    except OSError, e:
+        _fatal("%s: write failed: %s" % (tmpfile, e.strerror))
+
+    try:
+        fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
+    except IOError, e:
+        _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror))
+
+    # Rename or link it to the correct name.
+    if _overwrite_pidfile:
         try:
             os.rename(tmpfile, _pidfile)
         except OSError, e:
-            ovs.fatal_signal.remove_file_to_unlink(_pidfile)
-            logging.error("failed to rename \"%s\" to \"%s\": %s"
-                          % (tmpfile, _pidfile, os.strerror(e.errno)))
-            file.close()
-            return
+            _fatal("failed to rename \"%s\" to \"%s\" (%s)"
+                   % (tmpfile, _pidfile, e.strerror))
+    else:
+        while True:
+            try:
+                os.link(tmpfile, _pidfile)
+                error = 0
+            except OSError, e:
+                error = e.errno
+            if error == errno.EEXIST:
+                _check_already_running()
+            elif error != errno.EINTR:
+                break
+        if error:
+            _fatal("failed to link \"%s\" as \"%s\" (%s)"
+                   % (tmpfile, _pidfile, os.strerror(error)))
 
-        s = os.fstat(file.fileno())
-        _pidfile_dev = s.st_dev
-        _pidfile_ino = s.st_ino
+
+    # Ensure that the pidfile will get deleted on exit.
+    ovs.fatal_signal.add_file_to_unlink(_pidfile)
+
+    # Delete the temporary pidfile if it still exists.
+    if not _overwrite_pidfile:
+        error = ovs.fatal_signal.unlink_file_now(tmpfile)
+        if error:
+            _fatal("%s: unlink failed (%s)" % (tmpfile, os.strerror(error)))
+
+    _pidfile_dev = s.st_dev
+    _pidfile_ino = s.st_ino
 
 def daemonize():
     """If configured with set_pidfile() or set_detach(), creates the pid file
@@ -343,8 +348,8 @@ def daemonize_start():
             _monitor_daemon(daemon_pid)
         # Running in daemon process
     
-    _die_if_already_running()
-    _make_pidfile()
+    if _pidfile:
+        _make_pidfile()
 
 def daemonize_complete():
     """If daemonization is configured, then this function notifies the parent
@@ -366,7 +371,7 @@ Daemon options:
    --overwrite-pidfile     with --pidfile, start even if already running
 """ % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME))
 
-def __read_pidfile(pidfile, must_exist):
+def __read_pidfile(pidfile, delete_if_stale):
     if _pidfile_dev is not None:
         try:
             s = os.stat(pidfile)
@@ -381,31 +386,54 @@ def __read_pidfile(pidfile, must_exist):
             pass
 
     try:
-        file = open(pidfile, "r")
+        file = open(pidfile, "r+")
     except IOError, e:
-        if e.errno == errno.ENOENT and not must_exist:
+        if e.errno == errno.ENOENT and delete_if_stale:
             return 0
-        logging.warning("%s: open: %s" % (pidfile, os.strerror(e.errno)))
+        logging.warning("%s: open: %s" % (pidfile, e.strerror))
         return -e.errno
 
     # Python fcntl doesn't directly support F_GETLK so we have to just try
-    # to lock it.  If we get a conflicting lock that's "success"; otherwise
-    # the file is not locked.
+    # to lock it.
     try:
         fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
-        # File isn't locked if we get here, so treat that as an error.
-        logging.warning("%s: pid file is not locked" % pidfile)
-        try:
-            # As a side effect, this drops the lock.
+
+        # pidfile exists but wasn't locked by anyone.  Now we have the lock.
+        if not delete_if_stale:
             file.close()
+            logging.warning("%s: pid file is stale" % pidfile)
+            return -errno.ESRCH
+
+        # Is the file we have locked still named 'pidfile'?
+        try:
+            raced = False
+            s = os.stat(pidfile)
+            s2 = os.fstat(file.fileno())
+            if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev:
+                raced = True
         except IOError:
-            pass
-        return -errno.ESRCH
+            raced = True
+        if raced:
+            logging.warning("%s: lost race to delete pidfile" % pidfile)
+            return -errno.ALREADY
+
+        # We won the right to delete the stale pidfile.
+        try:
+            os.unlink(pidfile)
+        except IOError, e:
+            logging.warning("%s: failed to delete stale pidfile"
+                            % (pidfile, e.strerror))
+            return -e.errno
+
+        logging.debug("%s: deleted stale pidfile" % pidfile)
+        file.close()
+        return 0
     except IOError, e:
         if e.errno not in [errno.EACCES, errno.EAGAIN]:
-            logging.warn("%s: fcntl: %s" % (pidfile, os.strerror(e.errno)))
+            logging.warn("%s: fcntl: %s" % (pidfile, e.strerror))
             return -e.errno
 
+    # Someone else has the pidfile locked.
     try:
         try:
             return int(file.readline())
@@ -424,13 +452,16 @@ def __read_pidfile(pidfile, must_exist):
 def read_pidfile(pidfile):
     """Opens and reads a PID from 'pidfile'.  Returns the positive PID if
     successful, otherwise a negative errno value."""
-    return __read_pidfile(pidfile, True)
-
-def read_pidfile_if_exists(pidfile):
-    """Opens and reads a PID from 'pidfile'.  Returns 0 if 'pidfile' does not
-    exist, the positive PID if successful, otherwise a negative errno value."""
     return __read_pidfile(pidfile, False)
 
+def _check_already_running():
+    pid = __read_pidfile(_pidfile, True)
+    if pid > 0:
+        _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid))
+    elif pid < 0:
+        _fatal("%s: pidfile check failed (%s), aborting"
+               % (_pidfile, os.strerror(pid)))
+
 # XXX Python's getopt does not support options with optional arguments, so we
 # have to separate --pidfile (with no argument) from --pidfile-name (with an
 # argument).  Need to write our own getopt I guess.
diff --git a/tests/test-daemon.py b/tests/test-daemon.py
index 586e0ec..350b8f7 100644
--- a/tests/test-daemon.py
+++ b/tests/test-daemon.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import getopt
+import logging
 import signal
 import sys
 import time
@@ -24,6 +25,7 @@ def handler(signum, frame):
     raise Exception("Signal handler called with %d" % signum)
 
 def main(argv):
+    logging.basicConfig(level=logging.DEBUG)
 
     signal.signal(signal.SIGHUP, handler)
 
-- 
1.7.4.4




More information about the dev mailing list