[ovs-dev] [long socket names 3/3] socket-util: Work around Unix domain socket path name limits on Linux.
Ethan Jackson
ethan at nicira.com
Wed Nov 10 21:59:59 UTC 2010
Looks good.
On Tue, Nov 9, 2010 at 3:59 PM, Ben Pfaff <blp at nicira.com> wrote:
> Many Open vSwitch tests fail on Debian's automatic build machines because
> the builds occur in deeply nested directories with long names. OVS tries
> to bind and connect to Unix domain sockets using absolute path names, which
> in combination with long directory names means that the socket's name
> exceeds the limit for Unix domain socket names (108 bytes on Linux).
>
> This commit works around the problem on Linux by indirecting through
> /proc/self/fd/<dirfd>/<basename> when names exceed the maximum that can be
> used directly.
>
> Reported-by: Hector Oron <hector.oron at gmail.com>
> Reported-by: Sebastian Andrzej Siewior <sebastian at breakpoint.cc>
> Reported-by: Roger Leigh <rleigh at codelibre.net>
> Debian bug #602891.
> Debian bug #602911.
> ---
> AUTHORS | 3 +
> lib/socket-util.c | 107 ++++++++++++++++++++++++++++++++++++++++++----
> tests/.gitignore | 1 +
> tests/automake.mk | 6 +++
> tests/library.at | 27 ++++++++++++
> tests/test-unix-socket.c | 64 +++++++++++++++++++++++++++
> 6 files changed, 199 insertions(+), 9 deletions(-)
> create mode 100644 tests/test-unix-socket.c
>
> diff --git a/AUTHORS b/AUTHORS
> index 1330269..036e41e 100644
> --- a/AUTHORS
> +++ b/AUTHORS
> @@ -43,6 +43,7 @@ Cedric Hobbs cedric at nicira.com
> Derek Cormier derek.cormier at lab.ntt.co.jp
> DK Moon dkmoon at nicira.com
> Ghanem Bahri bahri.ghanem at gmail.com
> +Hector Oron hector.oron at gmail.com
> Henrik Amren henrik at nicira.com
> Jad Naous jnaous at gmail.com
> Jan Medved jmedved at juniper.net
> @@ -56,8 +57,10 @@ Paulo Cravero pcravero at as2594.net
> Peter Balland peter at nicira.com
> Ram Jothikumar rjothikumar at nicira.com
> Rob Hoes rob.hoes at citrix.com
> +Roger Leigh rleigh at codelibre.net
> Sajjad Lateef slateef at nicira.com
> Sean Brady sbrady at gtfservices.com
> +Sebastian Andrzej Siewior sebastian at breakpoint.cc
> Srini Seetharaman seethara at stanford.edu
> Stephen Hemminger shemminger at vyatta.com
> Takayuki HAMA t-hama at cb.jp.nec.com
> diff --git a/lib/socket-util.c b/lib/socket-util.c
> index 8d291ed..f74e43b 100644
> --- a/lib/socket-util.c
> +++ b/lib/socket-util.c
> @@ -45,6 +45,10 @@ VLOG_DEFINE_THIS_MODULE(socket_util);
> #define LINUX 0
> #endif
>
> +#ifndef O_DIRECTORY
> +#define O_DIRECTORY 0
> +#endif
> +
> /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a
> * positive errno value. */
> int
> @@ -212,7 +216,7 @@ drain_fd(int fd, size_t n_packets)
> /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in
> * '*un_len' the size of the sockaddr_un. */
> static void
> -make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len)
> +make_sockaddr_un__(const char *name, struct sockaddr_un *un, socklen_t *un_len)
> {
> un->sun_family = AF_UNIX;
> strncpy(un->sun_path, name, sizeof un->sun_path);
> @@ -221,6 +225,68 @@ make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len)
> + strlen (un->sun_path) + 1);
> }
>
> +/* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in
> + * '*un_len' the size of the sockaddr_un.
> + *
> + * Returns 0 on success, otherwise a positive errno value. On success,
> + * '*dirfdp' is either -1 or a nonnegative file descriptor that the caller
> + * should close after using '*un' to bind or connect. On failure, '*dirfdp' is
> + * -1. */
> +static int
> +make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len,
> + int *dirfdp)
> +{
> + enum { MAX_UN_LEN = sizeof un->sun_path - 1 };
> +
> + *dirfdp = -1;
> + if (strlen(name) > MAX_UN_LEN) {
> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +
> + if (LINUX) {
> + /* 'name' is too long to fit in a sockaddr_un, but we have a
> + * workaround for that on Linux: shorten it by opening a file
> + * descriptor for the directory part of the name and indirecting
> + * through /proc/self/fd/<dirfd>/<basename>. */
> + char *dir, *base;
> + char *short_name;
> + int dirfd;
> +
> + dir = dir_name(name);
> + base = base_name(name);
> +
> + dirfd = open(dir, O_DIRECTORY | O_RDONLY);
> + if (dirfd < 0) {
> + return errno;
> + }
> +
> + short_name = xasprintf("/proc/self/fd/%d/%s", dirfd, base);
> + free(dir);
> + free(base);
> +
> + if (strlen(short_name) <= MAX_UN_LEN) {
> + make_sockaddr_un__(short_name, un, un_len);
> + free(short_name);
> + *dirfdp = dirfd;
> + return 0;
> + }
> + free(short_name);
> + close(dirfd);
> +
> + VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
> + "%d bytes (even shortened)", name, MAX_UN_LEN);
> + } else {
> + /* 'name' is too long and we have no workaround. */
> + VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
> + "%d bytes", name, MAX_UN_LEN);
> + }
> +
> + return ENAMETOOLONG;
> + } else {
> + make_sockaddr_un__(name, un, un_len);
> + return 0;
> + }
> +}
> +
> /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or
> * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and
> * connected to '*connect_path' (if 'connect_path' is non-null). If 'nonblock'
> @@ -247,9 +313,11 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
> if (nonblock) {
> int flags = fcntl(fd, F_GETFL, 0);
> if (flags == -1) {
> + error = errno;
> goto error;
> }
> if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
> + error = errno;
> goto error;
> }
> }
> @@ -257,13 +325,22 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
> if (bind_path) {
> struct sockaddr_un un;
> socklen_t un_len;
> - make_sockaddr_un(bind_path, &un, &un_len);
> - if (unlink(un.sun_path) && errno != ENOENT) {
> - VLOG_WARN("unlinking \"%s\": %s\n", un.sun_path, strerror(errno));
> + int dirfd;
> +
> + if (unlink(bind_path) && errno != ENOENT) {
> + VLOG_WARN("unlinking \"%s\": %s\n", bind_path, strerror(errno));
> }
> fatal_signal_add_file_to_unlink(bind_path);
> - if (bind(fd, (struct sockaddr*) &un, un_len)
> - || fchmod(fd, S_IRWXU)) {
> +
> + error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd);
> + if (!error && (bind(fd, (struct sockaddr*) &un, un_len)
> + || fchmod(fd, S_IRWXU))) {
> + error = errno;
> + }
> + if (dirfd >= 0) {
> + close(dirfd);
> + }
> + if (error) {
> goto error;
> }
> }
> @@ -271,9 +348,18 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
> if (connect_path) {
> struct sockaddr_un un;
> socklen_t un_len;
> - make_sockaddr_un(connect_path, &un, &un_len);
> - if (connect(fd, (struct sockaddr*) &un, un_len)
> + int dirfd;
> +
> + error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd);
> + if (!error
> + && connect(fd, (struct sockaddr*) &un, un_len)
> && errno != EINPROGRESS) {
> + error = errno;
> + }
> + if (dirfd >= 0) {
> + close(dirfd);
> + }
> + if (error) {
> goto error;
> }
> }
> @@ -282,6 +368,7 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
> if (passcred) {
> int enable = 1;
> if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &enable, sizeof(enable))) {
> + error = errno;
> goto error;
> }
> }
> @@ -290,7 +377,9 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
> return fd;
>
> error:
> - error = errno == EAGAIN ? EPROTO : errno;
> + if (error == EAGAIN) {
> + error = EPROTO;
> + }
> if (bind_path) {
> fatal_signal_remove_file_to_unlink(bind_path);
> }
> diff --git a/tests/.gitignore b/tests/.gitignore
> index 91fa176..04bede3 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -22,6 +22,7 @@
> /test-timeval
> /test-sha1
> /test-type-props
> +/test-unix-socket
> /test-uuid
> /test-vconn
> /testsuite
> diff --git a/tests/automake.mk b/tests/automake.mk
> index 9e1bd9c..0bac351 100644
> --- a/tests/automake.mk
> +++ b/tests/automake.mk
> @@ -78,6 +78,7 @@ lcov_wrappers = \
> tests/lcov/test-sha1 \
> tests/lcov/test-timeval \
> tests/lcov/test-type-props \
> + tests/lcov/test-unix-socket \
> tests/lcov/test-uuid \
> tests/lcov/test-vconn
>
> @@ -127,6 +128,7 @@ valgrind_wrappers = \
> tests/valgrind/test-sha1 \
> tests/valgrind/test-timeval \
> tests/valgrind/test-type-props \
> + tests/valgrind/test-unix-socket \
> tests/valgrind/test-uuid \
> tests/valgrind/test-vconn
>
> @@ -214,6 +216,10 @@ noinst_PROGRAMS += tests/test-lockfile
> tests_test_lockfile_SOURCES = tests/test-lockfile.c
> tests_test_lockfile_LDADD = lib/libopenvswitch.a
>
> +noinst_PROGRAMS += tests/test-unix-socket
> +tests_test_unix_socket_SOURCES = tests/test-unix-socket.c
> +tests_test_unix_socket_LDADD = lib/libopenvswitch.a
> +
> noinst_PROGRAMS += tests/test-ovsdb
> tests_test_ovsdb_SOURCES = \
> tests/test-ovsdb.c \
> diff --git a/tests/library.at b/tests/library.at
> index 1dca2b8..cab8e2c 100644
> --- a/tests/library.at
> +++ b/tests/library.at
> @@ -38,3 +38,30 @@ AT_SETUP([test byte order conversion])
> AT_KEYWORDS([byte order])
> AT_CHECK([test-byte-order], [0], [ignore])
> AT_CLEANUP
> +
> +AT_SETUP([test unix socket -- short pathname])
> +AT_CHECK([test-unix-socket x])
> +AT_CLEANUP
> +
> +dnl Unix sockets with long names are problematic because the name has to
> +dnl go in a fixed-length field in struct sockaddr_un. Generally the limit
> +dnl is about 100 bytes. On Linux, we work around this by indirecting through
> +dnl a directory fd using /proc/self/fd/<dirfd>. We do not have a workaround
> +dnl for other platforms, so we skip the test there.
> +AT_SETUP([test unix socket -- long pathname])
> +AT_CHECK([dnl
> + case `uname` in dnl (
> + *[[lL]]inux*)
> + exit 0
> + ;; dnl (
> + *)
> + dnl Magic exit code to tell Autotest to skip this test.
> + exit 77
> + ;;
> + esac
> +])
> +dnl Linux has a 108 byte limit; this is 150 bytes long.
> +mkdir 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
> +cd 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
> +AT_CHECK([test-unix-socket ../012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/socket socket])
> +AT_CLEANUP
> diff --git a/tests/test-unix-socket.c b/tests/test-unix-socket.c
> new file mode 100644
> index 0000000..ec90048
> --- /dev/null
> +++ b/tests/test-unix-socket.c
> @@ -0,0 +1,64 @@
> +/*
> + * Copyright (c) 2010 Nicira Networks.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include <errno.h>
> +#include <signal.h>
> +#include <unistd.h>
> +
> +#include "util.h"
> +#include "socket-util.h"
> +
> +int
> +main(int argc, char *argv[])
> +{
> + const char *sockname1;
> + const char *sockname2;
> + int sock1, sock2;
> +
> + set_program_name(argv[0]);
> +
> + if (argc != 2 && argc != 3) {
> + ovs_fatal(0, "usage: %s SOCKETNAME1 [SOCKETNAME2]", argv[0]);
> + }
> + sockname1 = argv[1];
> + sockname2 = argc > 2 ? argv[2] : sockname1;
> +
> + signal(SIGALRM, SIG_DFL);
> + alarm(5);
> +
> + /* Create a listening socket under name 'sockname1'. */
> + sock1 = make_unix_socket(SOCK_STREAM, false, false, sockname1, NULL);
> + if (sock1 < 0) {
> + ovs_fatal(-sock1, "%s: bind failed", sockname1);
> + }
> + if (listen(sock1, 1)) {
> + ovs_fatal(errno, "%s: listen failed", sockname1);
> + }
> +
> + /* Connect to 'sockname2' (which should be the same file, perhaps under a
> + * different name). */
> + sock2 = make_unix_socket(SOCK_STREAM, false, false, NULL, sockname2);
> + if (sock2 < 0) {
> + ovs_fatal(-sock2, "%s: connect failed", sockname2);
> + }
> +
> + close(sock1);
> + close(sock2);
> +
> + return 0;
> +}
> --
> 1.7.1
>
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev_openvswitch.org
>
More information about the dev
mailing list