[ovs-dev] [long socket names 3/3] socket-util: Work around Unix domain socket path name limits on Linux.

Ethan Jackson ethan at nicira.com
Wed Nov 10 21:59:59 UTC 2010


Looks good.

On Tue, Nov 9, 2010 at 3:59 PM, Ben Pfaff <blp at nicira.com> wrote:
> Many Open vSwitch tests fail on Debian's automatic build machines because
> the builds occur in deeply nested directories with long names.  OVS tries
> to bind and connect to Unix domain sockets using absolute path names, which
> in combination with long directory names means that the socket's name
> exceeds the limit for Unix domain socket names (108 bytes on Linux).
>
> This commit works around the problem on Linux by indirecting through
> /proc/self/fd/<dirfd>/<basename> when names exceed the maximum that can be
> used directly.
>
> Reported-by: Hector Oron <hector.oron at gmail.com>
> Reported-by: Sebastian Andrzej Siewior <sebastian at breakpoint.cc>
> Reported-by: Roger Leigh <rleigh at codelibre.net>
> Debian bug #602891.
> Debian bug #602911.
> ---
>  AUTHORS                  |    3 +
>  lib/socket-util.c        |  107 ++++++++++++++++++++++++++++++++++++++++++----
>  tests/.gitignore         |    1 +
>  tests/automake.mk        |    6 +++
>  tests/library.at         |   27 ++++++++++++
>  tests/test-unix-socket.c |   64 +++++++++++++++++++++++++++
>  6 files changed, 199 insertions(+), 9 deletions(-)
>  create mode 100644 tests/test-unix-socket.c
>
> diff --git a/AUTHORS b/AUTHORS
> index 1330269..036e41e 100644
> --- a/AUTHORS
> +++ b/AUTHORS
> @@ -43,6 +43,7 @@ Cedric Hobbs            cedric at nicira.com
>  Derek Cormier           derek.cormier at lab.ntt.co.jp
>  DK Moon                 dkmoon at nicira.com
>  Ghanem Bahri            bahri.ghanem at gmail.com
> +Hector Oron             hector.oron at gmail.com
>  Henrik Amren            henrik at nicira.com
>  Jad Naous               jnaous at gmail.com
>  Jan Medved              jmedved at juniper.net
> @@ -56,8 +57,10 @@ Paulo Cravero           pcravero at as2594.net
>  Peter Balland           peter at nicira.com
>  Ram Jothikumar          rjothikumar at nicira.com
>  Rob Hoes                rob.hoes at citrix.com
> +Roger Leigh             rleigh at codelibre.net
>  Sajjad Lateef           slateef at nicira.com
>  Sean Brady              sbrady at gtfservices.com
> +Sebastian Andrzej Siewior  sebastian at breakpoint.cc
>  Srini Seetharaman       seethara at stanford.edu
>  Stephen Hemminger       shemminger at vyatta.com
>  Takayuki HAMA           t-hama at cb.jp.nec.com
> diff --git a/lib/socket-util.c b/lib/socket-util.c
> index 8d291ed..f74e43b 100644
> --- a/lib/socket-util.c
> +++ b/lib/socket-util.c
> @@ -45,6 +45,10 @@ VLOG_DEFINE_THIS_MODULE(socket_util);
>  #define LINUX 0
>  #endif
>
> +#ifndef O_DIRECTORY
> +#define O_DIRECTORY 0
> +#endif
> +
>  /* Sets 'fd' to non-blocking mode.  Returns 0 if successful, otherwise a
>  * positive errno value. */
>  int
> @@ -212,7 +216,7 @@ drain_fd(int fd, size_t n_packets)
>  /* Stores in '*un' a sockaddr_un that refers to file 'name'.  Stores in
>  * '*un_len' the size of the sockaddr_un. */
>  static void
> -make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len)
> +make_sockaddr_un__(const char *name, struct sockaddr_un *un, socklen_t *un_len)
>  {
>     un->sun_family = AF_UNIX;
>     strncpy(un->sun_path, name, sizeof un->sun_path);
> @@ -221,6 +225,68 @@ make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len)
>                 + strlen (un->sun_path) + 1);
>  }
>
> +/* Stores in '*un' a sockaddr_un that refers to file 'name'.  Stores in
> + * '*un_len' the size of the sockaddr_un.
> + *
> + * Returns 0 on success, otherwise a positive errno value.  On success,
> + * '*dirfdp' is either -1 or a nonnegative file descriptor that the caller
> + * should close after using '*un' to bind or connect.  On failure, '*dirfdp' is
> + * -1. */
> +static int
> +make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len,
> +                 int *dirfdp)
> +{
> +    enum { MAX_UN_LEN = sizeof un->sun_path - 1 };
> +
> +    *dirfdp = -1;
> +    if (strlen(name) > MAX_UN_LEN) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +
> +        if (LINUX) {
> +            /* 'name' is too long to fit in a sockaddr_un, but we have a
> +             * workaround for that on Linux: shorten it by opening a file
> +             * descriptor for the directory part of the name and indirecting
> +             * through /proc/self/fd/<dirfd>/<basename>. */
> +            char *dir, *base;
> +            char *short_name;
> +            int dirfd;
> +
> +            dir = dir_name(name);
> +            base = base_name(name);
> +
> +            dirfd = open(dir, O_DIRECTORY | O_RDONLY);
> +            if (dirfd < 0) {
> +                return errno;
> +            }
> +
> +            short_name = xasprintf("/proc/self/fd/%d/%s", dirfd, base);
> +            free(dir);
> +            free(base);
> +
> +            if (strlen(short_name) <= MAX_UN_LEN) {
> +                make_sockaddr_un__(short_name, un, un_len);
> +                free(short_name);
> +                *dirfdp = dirfd;
> +                return 0;
> +            }
> +            free(short_name);
> +            close(dirfd);
> +
> +            VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
> +                         "%d bytes (even shortened)", name, MAX_UN_LEN);
> +        } else {
> +            /* 'name' is too long and we have no workaround. */
> +            VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
> +                         "%d bytes", name, MAX_UN_LEN);
> +        }
> +
> +        return ENAMETOOLONG;
> +    } else {
> +        make_sockaddr_un__(name, un, un_len);
> +        return 0;
> +    }
> +}
> +
>  /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or
>  * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and
>  * connected to '*connect_path' (if 'connect_path' is non-null).  If 'nonblock'
> @@ -247,9 +313,11 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
>     if (nonblock) {
>         int flags = fcntl(fd, F_GETFL, 0);
>         if (flags == -1) {
> +            error = errno;
>             goto error;
>         }
>         if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
> +            error = errno;
>             goto error;
>         }
>     }
> @@ -257,13 +325,22 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
>     if (bind_path) {
>         struct sockaddr_un un;
>         socklen_t un_len;
> -        make_sockaddr_un(bind_path, &un, &un_len);
> -        if (unlink(un.sun_path) && errno != ENOENT) {
> -            VLOG_WARN("unlinking \"%s\": %s\n", un.sun_path, strerror(errno));
> +        int dirfd;
> +
> +        if (unlink(bind_path) && errno != ENOENT) {
> +            VLOG_WARN("unlinking \"%s\": %s\n", bind_path, strerror(errno));
>         }
>         fatal_signal_add_file_to_unlink(bind_path);
> -        if (bind(fd, (struct sockaddr*) &un, un_len)
> -            || fchmod(fd, S_IRWXU)) {
> +
> +        error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd);
> +        if (!error && (bind(fd, (struct sockaddr*) &un, un_len)
> +                       || fchmod(fd, S_IRWXU))) {
> +            error = errno;
> +        }
> +        if (dirfd >= 0) {
> +            close(dirfd);
> +        }
> +        if (error) {
>             goto error;
>         }
>     }
> @@ -271,9 +348,18 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
>     if (connect_path) {
>         struct sockaddr_un un;
>         socklen_t un_len;
> -        make_sockaddr_un(connect_path, &un, &un_len);
> -        if (connect(fd, (struct sockaddr*) &un, un_len)
> +        int dirfd;
> +
> +        error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd);
> +        if (!error
> +            && connect(fd, (struct sockaddr*) &un, un_len)
>             && errno != EINPROGRESS) {
> +            error = errno;
> +        }
> +        if (dirfd >= 0) {
> +            close(dirfd);
> +        }
> +        if (error) {
>             goto error;
>         }
>     }
> @@ -282,6 +368,7 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
>     if (passcred) {
>         int enable = 1;
>         if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &enable, sizeof(enable))) {
> +            error = errno;
>             goto error;
>         }
>     }
> @@ -290,7 +377,9 @@ make_unix_socket(int style, bool nonblock, bool passcred OVS_UNUSED,
>     return fd;
>
>  error:
> -    error = errno == EAGAIN ? EPROTO : errno;
> +    if (error == EAGAIN) {
> +        error = EPROTO;
> +    }
>     if (bind_path) {
>         fatal_signal_remove_file_to_unlink(bind_path);
>     }
> diff --git a/tests/.gitignore b/tests/.gitignore
> index 91fa176..04bede3 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -22,6 +22,7 @@
>  /test-timeval
>  /test-sha1
>  /test-type-props
> +/test-unix-socket
>  /test-uuid
>  /test-vconn
>  /testsuite
> diff --git a/tests/automake.mk b/tests/automake.mk
> index 9e1bd9c..0bac351 100644
> --- a/tests/automake.mk
> +++ b/tests/automake.mk
> @@ -78,6 +78,7 @@ lcov_wrappers = \
>        tests/lcov/test-sha1 \
>        tests/lcov/test-timeval \
>        tests/lcov/test-type-props \
> +       tests/lcov/test-unix-socket \
>        tests/lcov/test-uuid \
>        tests/lcov/test-vconn
>
> @@ -127,6 +128,7 @@ valgrind_wrappers = \
>        tests/valgrind/test-sha1 \
>        tests/valgrind/test-timeval \
>        tests/valgrind/test-type-props \
> +       tests/valgrind/test-unix-socket \
>        tests/valgrind/test-uuid \
>        tests/valgrind/test-vconn
>
> @@ -214,6 +216,10 @@ noinst_PROGRAMS += tests/test-lockfile
>  tests_test_lockfile_SOURCES = tests/test-lockfile.c
>  tests_test_lockfile_LDADD = lib/libopenvswitch.a
>
> +noinst_PROGRAMS += tests/test-unix-socket
> +tests_test_unix_socket_SOURCES = tests/test-unix-socket.c
> +tests_test_unix_socket_LDADD = lib/libopenvswitch.a
> +
>  noinst_PROGRAMS += tests/test-ovsdb
>  tests_test_ovsdb_SOURCES = \
>        tests/test-ovsdb.c \
> diff --git a/tests/library.at b/tests/library.at
> index 1dca2b8..cab8e2c 100644
> --- a/tests/library.at
> +++ b/tests/library.at
> @@ -38,3 +38,30 @@ AT_SETUP([test byte order conversion])
>  AT_KEYWORDS([byte order])
>  AT_CHECK([test-byte-order], [0], [ignore])
>  AT_CLEANUP
> +
> +AT_SETUP([test unix socket -- short pathname])
> +AT_CHECK([test-unix-socket x])
> +AT_CLEANUP
> +
> +dnl Unix sockets with long names are problematic because the name has to
> +dnl go in a fixed-length field in struct sockaddr_un.  Generally the limit
> +dnl is about 100 bytes.  On Linux, we work around this by indirecting through
> +dnl a directory fd using /proc/self/fd/<dirfd>.  We do not have a workaround
> +dnl for other platforms, so we skip the test there.
> +AT_SETUP([test unix socket -- long pathname])
> +AT_CHECK([dnl
> +    case `uname` in dnl (
> +      *[[lL]]inux*)
> +        exit 0
> +        ;; dnl (
> +      *)
> +        dnl Magic exit code to tell Autotest to skip this test.
> +        exit 77
> +        ;;
> +    esac
> +])
> +dnl Linux has a 108 byte limit; this is 150 bytes long.
> +mkdir 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
> +cd 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
> +AT_CHECK([test-unix-socket ../012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/socket socket])
> +AT_CLEANUP
> diff --git a/tests/test-unix-socket.c b/tests/test-unix-socket.c
> new file mode 100644
> index 0000000..ec90048
> --- /dev/null
> +++ b/tests/test-unix-socket.c
> @@ -0,0 +1,64 @@
> +/*
> + * Copyright (c) 2010 Nicira Networks.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include <errno.h>
> +#include <signal.h>
> +#include <unistd.h>
> +
> +#include "util.h"
> +#include "socket-util.h"
> +
> +int
> +main(int argc, char *argv[])
> +{
> +    const char *sockname1;
> +    const char *sockname2;
> +    int sock1, sock2;
> +
> +    set_program_name(argv[0]);
> +
> +    if (argc != 2 && argc != 3) {
> +        ovs_fatal(0, "usage: %s SOCKETNAME1 [SOCKETNAME2]", argv[0]);
> +    }
> +    sockname1 = argv[1];
> +    sockname2 = argc > 2 ? argv[2] : sockname1;
> +
> +    signal(SIGALRM, SIG_DFL);
> +    alarm(5);
> +
> +    /* Create a listening socket under name 'sockname1'. */
> +    sock1 = make_unix_socket(SOCK_STREAM, false, false, sockname1, NULL);
> +    if (sock1 < 0) {
> +        ovs_fatal(-sock1, "%s: bind failed", sockname1);
> +    }
> +    if (listen(sock1, 1)) {
> +        ovs_fatal(errno, "%s: listen failed", sockname1);
> +    }
> +
> +    /* Connect to 'sockname2' (which should be the same file, perhaps under a
> +     * different name). */
> +    sock2 = make_unix_socket(SOCK_STREAM, false, false, NULL, sockname2);
> +    if (sock2 < 0) {
> +        ovs_fatal(-sock2, "%s: connect failed", sockname2);
> +    }
> +
> +    close(sock1);
> +    close(sock2);
> +
> +    return 0;
> +}
> --
> 1.7.1
>
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev_openvswitch.org
>




More information about the dev mailing list