[ovs-dev] [long socket names 3/3] socket-util: Work around Unix domain socket path name limits on Linux.

Ben Pfaff blp at nicira.com
Tue Nov 9 23:59:27 UTC 2010


Many Open vSwitch tests fail on Debian's automatic build machines because
the builds occur in deeply nested directories with long names.  OVS tries
to bind and connect to Unix domain sockets using absolute path names, which
in combination with long directory names means that the socket's name
exceeds the limit for Unix domain socket names (108 bytes on Linux).

This commit works around the problem on Linux by indirecting through
/proc/self/fd/<dirfd>/<basename> when names exceed the maximum that can be
used directly.

Reported-by: Hector Oron <hector.oron at gmail.com>
Reported-by: Sebastian Andrzej Siewior <sebastian at breakpoint.cc>
Reported-by: Roger Leigh <rleigh at codelibre.net>
Debian bug #602891.
Debian bug #602911.
---
 AUTHORS                  |    3 +
 lib/socket-util.c        |  107 ++++++++++++++++++++++++++++++++++++++++++----
 tests/.gitignore         |    1 +
 tests/automake.mk        |    6 +++
 tests/library.at         |   27 ++++++++++++
 tests/test-unix-socket.c |   64 +++++++++++++++++++++++++++
 6 files changed, 199 insertions(+), 9 deletions(-)
 create mode 100644 tests/test-unix-socket.c

diff --git a/AUTHORS b/AUTHORS
index 1330269..036e41e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -43,6 +43,7 @@ Cedric Hobbs            cedric at nicira.com
 Derek Cormier           derek.cormier at lab.ntt.co.jp
 DK Moon                 dkmoon at nicira.com
 Ghanem Bahri            bahri.ghanem at gmail.com
+Hector Oron             hector.oron at gmail.com
 Henrik Amren            henrik at nicira.com
 Jad Naous               jnaous at gmail.com
 Jan Medved              jmedved at juniper.net
@@ -56,8 +57,10 @@ Paulo Cravero           pcravero at as2594.net
 Peter Balland           peter at nicira.com
 Ram Jothikumar          rjothikumar at nicira.com
 Rob Hoes                rob.hoes at citrix.com
+Roger Leigh             rleigh at codelibre.net
 Sajjad Lateef           slateef at nicira.com
 Sean Brady              sbrady at gtfservices.com
+Sebastian Andrzej Siewior  sebastian at breakpoint.cc
 Srini Seetharaman       seethara at stanford.edu
 Stephen Hemminger       shemminger at vyatta.com
 Takayuki HAMA           t-hama at cb.jp.nec.com
diff --git a/lib/socket-util.c b/lib/socket-util.c
index 8d291ed..f74e43b 100644
--- a/lib/socket-util.c
+++ b/lib/socket-util.c
@@ -45,6 +45,10 @@ VLOG_DEFINE_THIS_MODULE(socket_util);
 #define LINUX 0
 #endif
 
+#ifndef O_DIRECTORY
+#define O_DIRECTORY 0
+#endif
+
 /* Sets 'fd' to non-blocking mode.  Returns 0 if successful, otherwise a
  * positive errno value. */
 int
@@ -212,7 +216,7 @@ drain_fd(int fd, size_t n_packets)
 /* Stores in '*un' a sockaddr_un that refers to file 'name'.  Stores in
  * '*un_len' the size of the sockaddr_un. */
 static void
-make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len)
+make_sockaddr_un__(const char *name, struct sockaddr_un *un, socklen_t *un_len)
 {
     un->sun_family = AF_UNIX;
     strncpy(un->sun_path, name, sizeof un->sun_path);
@@ -221,6 +225,68 @@ make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len)
                 + strlen (un->sun_path) + 1);
 }
 
+/* Stores in '*un' a sockaddr_un that refers to file 'name'.  Stores in
+ * '*un_len' the size of the sockaddr_un.
+ *
+ * Returns 0 on success, otherwise a positive errno value.  On success,
+ * '*dirfdp' is either -1 or a nonnegative file descriptor that the caller
+ * should close after using '*un' to bind or connect.  On failure, '*dirfdp' is
+ * -1. */
+static int
+make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len,
+                 int *dirfdp)
+{
+    enum { MAX_UN_LEN = sizeof un->sun_path - 1 };
+
+    *dirfdp = -1;
+    if (strlen(name) > MAX_UN_LEN) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+        if (LINUX) {
+            /* 'name' is too long to fit in a sockaddr_un, but we have a
+             * workaround for that on Linux: shorten it by opening a file
+             * descriptor for the directory part of the name and indirecting
+             * through /proc/self/fd/<dirfd>/<basename>. */
+            char *dir, *base;
+            char *short_name;
+            int dirfd;
+
+            dir = dir_name(name);
+            base = base_name(name);
+
+            dirfd = open(dir, O_DIRECTORY | O_RDONLY);
+            if (dirfd < 0) {
+                return errno;
+            }
+
+            short_name = xasprintf("/proc/self/fd/%d/%s", dirfd, base);
+            free(dir);
+            free(base);
+
+            if (strlen(short_name) <= MAX_UN_LEN) {
+                make_sockaddr_un__(short_name, un, un_len);
+                free(short_name);
+                *dirfdp = dirfd;
+                return 0;
+            }
+            free(short_name);
+            close(dirfd);
+
+            VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
+                         "%d bytes (even shortened)", name, MAX_UN_LEN);
+        } else {
+            /* 'name' is too long and we have no workaround. */
+            VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
+                         "%d bytes", name, MAX_UN_LEN);
+        }
+
+        return ENAMETOOLONG;
+    } else {
+        make_sockaddr_un__(name, un, un_len);
+        return 0;
+    }
+}
+
 /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or
  * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and
  * connected to '*connect_path' (if 'connect_path' is non-null).  If 'nonblock'
@@ -247,9 +313,11 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
     if (nonblock) {
         int flags = fcntl(fd, F_GETFL, 0);
         if (flags == -1) {
+            error = errno;
             goto error;
         }
         if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
+            error = errno;
             goto error;
         }
     }
@@ -257,13 +325,22 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
     if (bind_path) {
         struct sockaddr_un un;
         socklen_t un_len;
-        make_sockaddr_un(bind_path, &un, &un_len);
-        if (unlink(un.sun_path) && errno != ENOENT) {
-            VLOG_WARN("unlinking \"%s\": %s\n", un.sun_path, strerror(errno));
+        int dirfd;
+
+        if (unlink(bind_path) && errno != ENOENT) {
+            VLOG_WARN("unlinking \"%s\": %s\n", bind_path, strerror(errno));
         }
         fatal_signal_add_file_to_unlink(bind_path);
-        if (bind(fd, (struct sockaddr*) &un, un_len)
-            || fchmod(fd, S_IRWXU)) {
+
+        error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd);
+        if (!error && (bind(fd, (struct sockaddr*) &un, un_len)
+                       || fchmod(fd, S_IRWXU))) {
+            error = errno;
+        }
+        if (dirfd >= 0) {
+            close(dirfd);
+        }
+        if (error) {
             goto error;
         }
     }
@@ -271,9 +348,18 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
     if (connect_path) {
         struct sockaddr_un un;
         socklen_t un_len;
-        make_sockaddr_un(connect_path, &un, &un_len);
-        if (connect(fd, (struct sockaddr*) &un, un_len)
+        int dirfd;
+
+        error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd);
+        if (!error
+            && connect(fd, (struct sockaddr*) &un, un_len)
             && errno != EINPROGRESS) {
+            error = errno;
+        }
+        if (dirfd >= 0) {
+            close(dirfd);
+        }
+        if (error) {
             goto error;
         }
     }
@@ -282,6 +368,7 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
     if (passcred) {
         int enable = 1;
         if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &enable, sizeof(enable))) {
+            error = errno;
             goto error;
         }
     }
@@ -290,7 +377,9 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
     return fd;
 
 error:
-    error = errno == EAGAIN ? EPROTO : errno;
+    if (error == EAGAIN) {
+        error = EPROTO;
+    }
     if (bind_path) {
         fatal_signal_remove_file_to_unlink(bind_path);
     }
diff --git a/tests/.gitignore b/tests/.gitignore
index 91fa176..04bede3 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -22,6 +22,7 @@
 /test-timeval
 /test-sha1
 /test-type-props
+/test-unix-socket
 /test-uuid
 /test-vconn
 /testsuite
diff --git a/tests/automake.mk b/tests/automake.mk
index 9e1bd9c..0bac351 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -78,6 +78,7 @@ lcov_wrappers = \
 	tests/lcov/test-sha1 \
 	tests/lcov/test-timeval \
 	tests/lcov/test-type-props \
+	tests/lcov/test-unix-socket \
 	tests/lcov/test-uuid \
 	tests/lcov/test-vconn
 
@@ -127,6 +128,7 @@ valgrind_wrappers = \
 	tests/valgrind/test-sha1 \
 	tests/valgrind/test-timeval \
 	tests/valgrind/test-type-props \
+	tests/valgrind/test-unix-socket \
 	tests/valgrind/test-uuid \
 	tests/valgrind/test-vconn
 
@@ -214,6 +216,10 @@ noinst_PROGRAMS += tests/test-lockfile
 tests_test_lockfile_SOURCES = tests/test-lockfile.c
 tests_test_lockfile_LDADD = lib/libopenvswitch.a
 
+noinst_PROGRAMS += tests/test-unix-socket
+tests_test_unix_socket_SOURCES = tests/test-unix-socket.c
+tests_test_unix_socket_LDADD = lib/libopenvswitch.a
+
 noinst_PROGRAMS += tests/test-ovsdb
 tests_test_ovsdb_SOURCES = \
 	tests/test-ovsdb.c \
diff --git a/tests/library.at b/tests/library.at
index 1dca2b8..cab8e2c 100644
--- a/tests/library.at
+++ b/tests/library.at
@@ -38,3 +38,30 @@ AT_SETUP([test byte order conversion])
 AT_KEYWORDS([byte order])
 AT_CHECK([test-byte-order], [0], [ignore])
 AT_CLEANUP
+
+AT_SETUP([test unix socket -- short pathname])
+AT_CHECK([test-unix-socket x])
+AT_CLEANUP
+
+dnl Unix sockets with long names are problematic because the name has to
+dnl go in a fixed-length field in struct sockaddr_un.  Generally the limit
+dnl is about 100 bytes.  On Linux, we work around this by indirecting through
+dnl a directory fd using /proc/self/fd/<dirfd>.  We do not have a workaround
+dnl for other platforms, so we skip the test there.
+AT_SETUP([test unix socket -- long pathname])
+AT_CHECK([dnl
+    case `uname` in dnl (
+      *[[lL]]inux*)
+        exit 0
+        ;; dnl (
+      *)
+        dnl Magic exit code to tell Autotest to skip this test.
+        exit 77
+        ;;
+    esac
+])
+dnl Linux has a 108 byte limit; this is 150 bytes long.
+mkdir 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+cd 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+AT_CHECK([test-unix-socket ../012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/socket socket])
+AT_CLEANUP
diff --git a/tests/test-unix-socket.c b/tests/test-unix-socket.c
new file mode 100644
index 0000000..ec90048
--- /dev/null
+++ b/tests/test-unix-socket.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "util.h"
+#include "socket-util.h"
+
+int
+main(int argc, char *argv[])
+{
+    const char *sockname1;
+    const char *sockname2;
+    int sock1, sock2;
+
+    set_program_name(argv[0]);
+
+    if (argc != 2 && argc != 3) {
+        ovs_fatal(0, "usage: %s SOCKETNAME1 [SOCKETNAME2]", argv[0]);
+    }
+    sockname1 = argv[1];
+    sockname2 = argc > 2 ? argv[2] : sockname1;
+
+    signal(SIGALRM, SIG_DFL);
+    alarm(5);
+
+    /* Create a listening socket under name 'sockname1'. */
+    sock1 = make_unix_socket(SOCK_STREAM, false, false, sockname1, NULL);
+    if (sock1 < 0) {
+        ovs_fatal(-sock1, "%s: bind failed", sockname1);
+    }
+    if (listen(sock1, 1)) {
+        ovs_fatal(errno, "%s: listen failed", sockname1);
+    }
+
+    /* Connect to 'sockname2' (which should be the same file, perhaps under a
+     * different name). */
+    sock2 = make_unix_socket(SOCK_STREAM, false, false, NULL, sockname2);
+    if (sock2 < 0) {
+        ovs_fatal(-sock2, "%s: connect failed", sockname2);
+    }
+
+    close(sock1);
+    close(sock2);
+
+    return 0;
+}
-- 
1.7.1





More information about the dev mailing list