[ovs-dev] [PATCH v4 3/3] netdev-dpdk: vHost client mode and reconnect

Ciara Loftus ciara.loftus at intel.com
Thu Aug 11 16:28:31 UTC 2016


Until now, vHost ports in OVS have only been able to operate in 'server'
mode whereby OVS creates and manages the vHost socket and essentially
acts as the vHost 'server'. With this commit a new mode, 'client' mode,
is available. In this mode, OVS acts as the vHost 'client' and connects
to the socket created and managed by QEMU which now acts as the vHost
'server'. This mode allows for reconnect capability, which allows a
vHost port to resume normal connectivity in event of switch reset.

By default dpdkvhostuser ports still operate in 'server' mode. That is
unless a valid 'vhost-server-path' is specified for that device like so:

ovs-vsctl set Interface <vhostportname>
options:vhost-server-path=<path_to_socket_dir>

Once specified, the port stays in 'client' mode for the remainder of its
lifetime.

QEMU v2.7.0+ is required when using OVS in vHost client mode and QEMU in
vHost server mode.

Signed-off-by: Ciara Loftus <ciara.loftus at intel.com>
---
v4:
- Rebase
- Remove vhost-driver-mode and allow per-interface flag.
- Use 'vhost-server-path' option to enable client mode for the given
  port and also to set the path for the client port.

v3:
- Only restrict vhost_sock_dir if server mode

v2
- Updated comments in vhost construct & destruct
- Add check for server-mode before printing error when destruct is called
  on a running VM
- Fixed coding style/standards issues
- Use strcmp instead of strncmp when processing 'vhost-driver-mode'
---
 INSTALL.DPDK-ADVANCED.md |  34 +++++++++++++
 NEWS                     |   1 +
 lib/netdev-dpdk.c        | 130 +++++++++++++++++++++++++++++++++++++++--------
 vswitchd/vswitch.xml     |  10 ++++
 4 files changed, 154 insertions(+), 21 deletions(-)

diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
index 8d6cabc..6f03533 100755
--- a/INSTALL.DPDK-ADVANCED.md
+++ b/INSTALL.DPDK-ADVANCED.md
@@ -568,6 +568,40 @@ For users wanting to do packet forwarding using kernel stack below are the steps
        where `-L`: Changes the numbers of channels of the specified network device
        and `combined`: Changes the number of multi-purpose channels.
 
+    4. OVS vHost client-mode & vHost reconnect (OPTIONAL)
+
+       By default, OVS DPDK acts as the vHost socket server for dpdkvhostuser
+       ports and QEMU acts as the vHost client. This means OVS creates and
+       manages the vHost socket and QEMU is the client which connects to the
+       vHost server (OVS).  In QEMU v2.7 the option is available for QEMU to
+       act as the vHost server meaning the roles can be reversed and OVS can
+       become the vHost client. To enable client mode for a given
+       dpdkvhostuserport, one must specify a valid 'vhost-server-path' like so:
+
+       ```
+       ovs-vsctl set Interface <vhostportname> options:vhost-server-path=<dir>
+       ```
+
+       Setting this value automatically switches the port to client mode (from
+       OVS' perspective).
+       'vhost-server-path' reflects the location the vHost socket
+       <vhostportname> resides in, or will reside in once QEMU is launched.
+       The port remains in 'client' mode for the remainder of it's lifetime ie.
+       it cannot be reverted back to server mode.
+
+       One must append ',server' to the 'chardev' arguments on the QEMU command
+       line, to instruct QEMU to use vHost server mode for a given interface,
+       like so:
+
+       ````
+       -chardev socket,id=char0,path=<dir>/<vhostportname>,server
+       ````
+
+       One benefit of using this mode is the ability for vHost ports to
+       'reconnect' in event of the switch crashing or being brought down. Once
+       it is brought back up, the vHost ports will reconnect automatically and
+       normal service will resume.
+
   - VM Configuration with libvirt
 
     * change the user/group, access control policty and restart libvirtd.
diff --git a/NEWS b/NEWS
index 9f09e1c..99412ba 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,7 @@ Post-v2.5.0
        fragmentation or NAT support yet)
      * Support for DPDK 16.07
      * Remove dpdkvhostcuse port type.
+     * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7)
    - Increase number of registers to 16.
    - ovs-benchmark: This utility has been removed due to lack of use and
      bitrot.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4e4c74e..e480ce8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -352,10 +352,11 @@ struct netdev_dpdk {
     /* True if vHost device is 'up' and has been reconfigured at least once */
     bool vhost_reconfigured;
 
-    /* Identifier used to distinguish vhost devices from each other.  It does
-     * not change during the lifetime of a struct netdev_dpdk.  It can be read
+    /* Identifiers used to distinguish vhost devices from each other. They do
+     * not change during the lifetime of a struct netdev_dpdk. They can be read
      * without holding any mutex. */
-    const char vhost_id[PATH_MAX];
+    const char vhost_server_id[PATH_MAX];
+    const char vhost_client_id[PATH_MAX];
 
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
@@ -373,6 +374,12 @@ struct netdev_dpdk {
     /* Socket ID detected when vHost device is brought up */
     int requested_socket_id;
 
+    /* Directory where vHost client socket resides */
+    char requested_vhost_client_path[PATH_MAX];
+
+    /* Denotes whether vHost port is client/server mode */
+    uint64_t vhost_driver_flags;
+
     /* Ingress Policer */
     OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
     uint32_t policer_rate;
@@ -760,6 +767,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
     ovsrcu_index_init(&dev->vid, -1);
     dev->vhost_reconfigured = false;
+    /* initialise vHost port in server mode */
+    dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT;
 
     buf_size = dpdk_buf_size(dev->mtu);
     dev->dpdk_mp = dpdk_mp_get(dev->socket_id, FRAME_LEN_TO_MTU(buf_size));
@@ -824,13 +833,21 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
     }
 }
 
+/* Returns a pointer to the relevant vHost socket ID depending on the mode in
+ * use */
+static const char *
+get_vhost_id(struct netdev_dpdk *dev)
+{
+    return dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT ?
+           dev->vhost_client_id : dev->vhost_server_id;
+}
+
 static int
 netdev_dpdk_vhost_construct(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     const char *name = netdev->name;
     int err;
-    uint64_t flags = 0;
 
     /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
      * the file system. '/' or '\' would traverse directories, so they're not
@@ -850,17 +867,21 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
     /* Take the name of the vhost-user port and append it to the location where
      * the socket is to be created, then register the socket.
      */
-    snprintf(CONST_CAST(char *, dev->vhost_id), sizeof dev->vhost_id, "%s/%s",
-             vhost_sock_dir, name);
+    snprintf(CONST_CAST(char *, dev->vhost_server_id),
+             sizeof(dev->vhost_server_id), "%s/%s", vhost_sock_dir, name);
 
-    err = rte_vhost_driver_register(dev->vhost_id, flags);
+    err = rte_vhost_driver_register(get_vhost_id(dev),
+                                    dev->vhost_driver_flags);
     if (err) {
         VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
-                 dev->vhost_id);
+                 get_vhost_id(dev));
     } else {
-        fatal_signal_add_file_to_unlink(dev->vhost_id);
-        VLOG_INFO("Socket %s created for vhost-user port %s\n",
-                  dev->vhost_id, name);
+        if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
+            /* OVS server mode - add this socket to list for deletion */
+            fatal_signal_add_file_to_unlink(get_vhost_id(dev));
+            VLOG_INFO("Socket %s created for vhost-user port %s\n",
+                      get_vhost_id(dev), name);
+        }
         err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
     }
 
@@ -918,7 +939,7 @@ dpdk_vhost_driver_unregister(struct netdev_dpdk *dev)
     OVS_EXCLUDED(dpdk_mutex)
     OVS_EXCLUDED(dev->mutex)
 {
-    return rte_vhost_driver_unregister(dev->vhost_id);
+    return rte_vhost_driver_unregister(get_vhost_id(dev));
 }
 
 static void
@@ -930,12 +951,13 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
     ovs_mutex_lock(&dev->mutex);
 
     /* Guest becomes an orphan if still attached. */
-    if (netdev_dpdk_get_vid(dev) >= 0) {
+    if (netdev_dpdk_get_vid(dev) >= 0
+        && !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
         VLOG_ERR("Removing port '%s' while vhost device still attached.",
                  netdev->name);
         VLOG_ERR("To restore connectivity after re-adding of port, VM on socket"
                  " '%s' must be restarted.",
-                 dev->vhost_id);
+                 get_vhost_id(dev));
     }
 
     free(ovsrcu_get_protected(struct ingress_policer *,
@@ -949,9 +971,10 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
     ovs_mutex_unlock(&dpdk_mutex);
 
     if (dpdk_vhost_driver_unregister(dev)) {
-        VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
-    } else {
-        fatal_signal_remove_file_to_unlink(dev->vhost_id);
+        VLOG_ERR("Unable to remove vhost-user socket %s", get_vhost_id(dev));
+    } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
+        /* OVS server mode - remove this socket from list for deletion */
+        fatal_signal_remove_file_to_unlink(get_vhost_id(dev));
     }
 }
 
@@ -1013,6 +1036,30 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
 }
 
 static int
+netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    const char *path;
+    struct stat s;
+
+    if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
+        path = smap_get(args, "vhost-server-path");
+        /* Request reconfigure if 'path':
+         *  1. is non-NULL.
+         *  2. represents a valid existing path.
+         *  3. hasn't already been requested ie. has changed since last call.
+         */
+        if (path && !stat(path, &s)
+                && strcmp(path, dev->requested_vhost_client_path)) {
+            strcpy(dev->requested_vhost_client_path, path);
+            netdev_request_reconfigure(netdev);
+        }
+    }
+
+    return 0;
+}
+
+static int
 netdev_dpdk_get_numa_id(const struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
@@ -2226,7 +2273,7 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
         }
     }
 
-    VLOG_DBG("TX queue mapping for %s\n", dev->vhost_id);
+    VLOG_DBG("TX queue mapping for %s\n", get_vhost_id(dev));
     for (i = 0; i < total_txqs; i++) {
         VLOG_DBG("%2d --> %2d", i, dev->tx_q[i].map);
     }
@@ -2250,7 +2297,7 @@ new_device(int vid)
     ovs_mutex_lock(&dpdk_mutex);
     /* Add device to the vhost port with the same name as that passed down. */
     LIST_FOR_EACH(dev, list_node, &dpdk_list) {
-        if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
+        if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) {
             uint32_t qp_num = rte_vhost_get_queue_num(vid);
 
             ovs_mutex_lock(&dev->mutex);
@@ -2376,7 +2423,7 @@ vring_state_changed(int vid, uint16_t queue_id, int enable)
 
     ovs_mutex_lock(&dpdk_mutex);
     LIST_FOR_EACH (dev, list_node, &dpdk_list) {
-        if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
+        if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) {
             ovs_mutex_lock(&dev->mutex);
             if (enable) {
                 dev->tx_q[qid].map = qid;
@@ -2895,6 +2942,47 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
         dev->vhost_reconfigured = true;
     }
 
+    /* Configure vHost client mode if requested and if the following criteria
+     * are met:
+     *  1. Device is currently in 'server' mode.
+     *  2. Device is currently not active.
+     *  3. A valid path has been specified.
+     */
+    if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)
+            && !(netdev_dpdk_get_vid(dev) >= 0)
+            && dev->requested_vhost_client_path
+            && strlen(dev->requested_vhost_client_path)) {
+        /* Unregister server-mode device */
+        ovs_mutex_unlock(&dpdk_mutex);
+        ovs_mutex_unlock(&dev->mutex);
+        err = dpdk_vhost_driver_unregister(dev);
+        ovs_mutex_lock(&dpdk_mutex);
+        ovs_mutex_lock(&dev->mutex);
+        if (err) {
+            VLOG_ERR("Unable to remove vhost-user socket %s",
+                     get_vhost_id(dev));
+        } else {
+            fatal_signal_remove_file_to_unlink(get_vhost_id(dev));
+            /* Create the new vhost_id using path specified */
+            snprintf(CONST_CAST(char *, dev->vhost_client_id),
+                     sizeof(dev->vhost_client_id),
+                     "%s/%s", dev->requested_vhost_client_path, dev->up.name);
+            /* Register client-mode device */
+            err = rte_vhost_driver_register(dev->vhost_client_id,
+                                            RTE_VHOST_USER_CLIENT);
+            if (err) {
+                VLOG_ERR("vhost-user device setup failure for device %s\n",
+                        dev->vhost_client_id);
+            } else {
+                /* Configuration successful */
+                dev->vhost_driver_flags |= RTE_VHOST_USER_CLIENT;
+                VLOG_INFO("vHost User device '%s' changed to 'client' mode, "
+                          "using client socket '%s'",
+                           dev->up.name, get_vhost_id(dev));
+            }
+        }
+    }
+
     ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 
@@ -3382,7 +3470,7 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_class =
         "dpdkvhostuser",
         netdev_dpdk_vhost_construct,
         netdev_dpdk_vhost_destruct,
-        NULL,
+        netdev_dpdk_vhost_set_config,
         NULL,
         netdev_dpdk_vhost_send,
         netdev_dpdk_vhost_get_carrier,
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 63f0d89..02adf8a 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -2366,6 +2366,16 @@
           </ul>
         </p>
       </column>
+
+      <column name="options" key="vhost-server-path"
+              type='{"type": "string"}'>
+        <p>
+          When specified, switches the given port permanently to 'client'
+          mode. The value specifies the directory in which to find the sockets
+          of vHost User client mode devices created by QEMU.
+          Only supported by DPDK vHost interfaces.
+        </p>
+      </column>
     </group>
 
     <group title="Interface Status">
-- 
2.4.3




More information about the dev mailing list