[ovs-dev] [v10 01/12] dpif-netdev: Add command line and function pointer for miniflow extract

Eelco Chaudron echaudro at redhat.com
Tue Jul 13 09:37:53 UTC 2021



On 13 Jul 2021, at 11:17, Amber, Kumar wrote:

> Hi Eelco,
>
> Pls find my replies and should I add your ack after fixing those if those are the last ones 😊
>
> <SNIP>
>
>>> +
>>> +miniflow_extract_func
>>> +dp_mfex_impl_get_default(void)
>>> +{
>>> +    return default_mfex_func;
>>
>> This is still not atomic, you need an atomic_read_relaxed() here.
>>
>
> Yes did it already for v11 when I realized 😊
>
>>> +}
>>> +
>>> +int
>>> +dp_mfex_impl_set_default_by_name(const char *name) {
>>> +    miniflow_extract_func new_default;
>>> +    atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
>>> +
>>> +    int err = dp_mfex_impl_get_by_name(name, &new_default);
>>> +
>>> +    if (!err) {
>>> +        atomic_store_relaxed(mfex_func, (uintptr_t) new_default);
>>> +    }
>>> +
>>> +    return err;
>>> +
>>> +}
>>> +
>>> +void
>>> +dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread
>> **pmd_list,
>>> +                 size_t pmd_list_size) {
>>> +    /* Add all MFEX functions to reply string. */
>>> +    ds_put_cstr(reply, "Available MFEX implementations:\n");
>>> +
>>> +    for (int i = 0; i < MFEX_IMPL_MAX; i++) {
>>> +        ds_put_format(reply, "  %s (available: %s pmds: ",
>>> +                      mfex_impls[i].name, mfex_impls[i].available ?
>>> +                      "available" : "not available");
>>
>> Maybe we should loose the “available:” part, as now it looks like this:
>>
>> $ ovs-appctl dpif-netdev/miniflow-parser-get Available MFEX implementations:
>>   autovalidator (available: available pmds: none)
>>   scalar (available: available pmds: 1,15)
>>   study (available: available pmds: none)
>>   avx512_vbmi_ipv4_udp (available: not available pmds: none)
>>   avx512_ipv4_udp (available: not available pmds: none)
>>   avx512_vbmi_ipv4_tcp (available: not available pmds: none)
>>   avx512_ipv4_tcp (available: not available pmds: none)
>>   avx512_vbmi_dot1q_ipv4_udp (available: not available pmds: none)
>>   avx512_dot1q_ipv4_udp (available: not available pmds: none)
>>   avx512_vbmi_dot1q_ipv4_tcp (available: not available pmds: none)
>>   avx512_dot1q_ipv4_tcp (available: not available pmds: none)
>>
>> Maybe this looks better without it, and adding a comma:
>>
>> $ ovs-appctl dpif-netdev/miniflow-parser-get Available MFEX implementations:
>>   autovalidator (available, pmds: none)
>>   scalar (available, pmds: 1,15)
>>   study (available, pmds: none)
>>   avx512_vbmi_ipv4_udp (not available, pmds: none)
>>   avx512_ipv4_udp (not available, pmds: none)
>>   avx512_vbmi_ipv4_tcp (not available, pmds: none)
>>   avx512_ipv4_tcp (not available, pmds: none)
>>   avx512_vbmi_dot1q_ipv4_udp (not available pmds: none)
>>   avx512_dot1q_ipv4_udp (not available, pmds: none)
>>   avx512_vbmi_dot1q_ipv4_tcp (not available, pmds: none)
>>   avx512_dot1q_ipv4_tcp (not available, pmds: none)
>>
>> What do you think?
>>
>
> True and false for this one looks good available ones look very clumsy ? thoughts

I think Flavio was fine with leaving it as true/false which is ok by me.

>>> +
>>> +        for (size_t j = 0; j < pmd_list_size; j++) {
>>> +            struct dp_netdev_pmd_thread *pmd = pmd_list[j];
>>> +            if (pmd->core_id == NON_PMD_CORE_ID) {
>>> +                continue;
>>> +            }
>>> +
>>> +            if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
>>> +                ds_put_format(reply, "%u,", pmd->core_id);
>>> +            }
>>> +        }
>>> +
>>> +        ds_chomp(reply, ',');
>>> +
>>> +        if (ds_last(reply) == ' ') {
>>> +            ds_put_cstr(reply, "none");
>>> +        }
>>> +
>>> +        ds_put_cstr(reply, ")\n");
>>> +    }
>>> +
>>> +}
>>> +
>>> +/* This function checks all available MFEX implementations, and
>>> +selects and
>>> + * returns the function pointer to the one requested by "name". If
>>> +nothing
>>> + * is found it reutrns error.
>>
>> You forgot to fix this?
>>
>
> I did but looks like typo again :p
>>> + */
>>> +int
>>> +dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func
>>> +*out_func) {
>>> +    if ((name == NULL) || (out_func == NULL)) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    for (int i = 0; i < MFEX_IMPL_MAX; i++) {
>>> +        if (strcmp(mfex_impls[i].name, name) == 0) {
>>> +            /* Check available is set before exec. */
>>> +            if (!mfex_impls[i].available) {
>>> +                *out_func = NULL;
>>> +                return -ENODEV;
>>> +            }
>>> +
>>> +            *out_func = mfex_impls[i].extract_func;
>>> +            return 0;
>>> +        }
>>> +    }
>>> +
>>> +    return -ENOENT;
>>> +}
>>> diff --git a/lib/dpif-netdev-private-extract.h
>>> b/lib/dpif-netdev-private-extract.h
>>> new file mode 100644
>>> index 000000000..614677dd0
>>> --- /dev/null
>>> +++ b/lib/dpif-netdev-private-extract.h
>>> @@ -0,0 +1,113 @@
>>> +/*
>>> + * Copyright (c) 2021 Intel.
>>> + *
>>> + * Licensed under the Apache License, Version 2.0 (the "License");
>>> + * you may not use this file except in compliance with the License.
>>> + * You may obtain a copy of the License at:
>>> + *
>>> + *     http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + * Unless required by applicable law or agreed to in writing,
>>> +software
>>> + * distributed under the License is distributed on an "AS IS" BASIS,
>>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>>> + * See the License for the specific language governing permissions
>>> +and
>>> + * limitations under the License.
>>> + */
>>> +
>>> +#ifndef MFEX_AVX512_EXTRACT
>>> +#define MFEX_AVX512_EXTRACT 1
>>> +
>>> +#include <sys/types.h>
>>> +
>>> +/* Forward declarations. */
>>> +struct dp_packet;
>>> +struct miniflow;
>>> +struct dp_netdev_pmd_thread;
>>> +struct dp_packet_batch;
>>> +struct netdev_flow_key;
>>> +
>>> +/* Function pointer prototype to be implemented in the optimized
>>> +miniflow
>>> + * extract code.
>>> + * returns the hitmask of the processed packets on success.
>>> + * returns zero on failure.
>>> + */
>>> +typedef uint32_t (*miniflow_extract_func)(struct dp_packet_batch *batch,
>>> +                                          struct netdev_flow_key *keys,
>>> +                                          uint32_t keys_size,
>>> +                                          odp_port_t in_port,
>>> +                                          struct dp_netdev_pmd_thread
>>> +                                          *pmd_handle);
>>> +
>>> +
>>> +/* The function pointer miniflow_extract_func depends on batch size.
>>> +*/ BUILD_ASSERT_DECL(NETDEV_MAX_BURST == 32);
>>> +
>>> +/* Probe function is used to detect if this CPU has the ISA required
>>> + * to run the optimized miniflow implementation.
>>> + * returns one on successful probe.
>>> + * returns negative errno on failure.
>>> + */
>>> +typedef int (*miniflow_extract_probe)(void);
>>> +
>>> +/* Structure representing the attributes of an optimized
>>> +implementation. */ struct dpif_miniflow_extract_impl {
>>> +    /* When it is true, this impl has passed the probe() checks. */
>>> +    bool available;
>>> +
>>> +    /* Probe function is used to detect if this CPU has the ISA required
>>> +     * to run the optimized miniflow implementation. It is optional and
>>> +     * if it is not used, then it must be null.
>>> +     */
>>> +    miniflow_extract_probe probe;
>>> +
>>> +    /* Optional function to call to extract miniflows for a burst of packets.
>>> +     * If it is not used must be set to NULL;
>>> +     */
>>> +    miniflow_extract_func extract_func;
>>> +
>>> +    /* Name of the optimized implementation. */
>>> +    char *name;
>>> +};
>>> +
>>> +
>>> +/* Enum to hold implementation indexes. The list is traversed
>>> + * linearly as from the ISA perspective, the VBMI version
>>> + * should always come before the generic AVX512-F version.
>>> + */
>>> +enum dpif_miniflow_extract_impl_idx {
>>> +    MFEX_IMPL_SCALAR,
>>> +    MFEX_IMPL_MAX
>>> +};
>>> +
>>> +extern struct ovs_mutex dp_netdev_mutex;
>>> +
>>> +/* This function returns all available implementations to the caller.
>>> +The
>>> + * quantity of implementations is returned by the int return value.
>>> + */
>>> +void
>>> +dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread
>> **pmd_list,
>>> +                 size_t pmd_list_size) OVS_REQUIRES(dp_netdev_mutex);
>>> +
>>> +/* This function checks all available MFEX implementations, and
>>> +selects the
>>> + * returns the function pointer to the one requested by "name".
>>> + */
>>> +int
>>> +dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func
>>> +*out_func);
>>> +
>>> +/* Returns the default MFEX which is first ./configure selected, but
>>> +can be
>>> + * overridden at runtime. */
>>> +miniflow_extract_func dp_mfex_impl_get_default(void);
>>> +
>>> +/* Overrides the default MFEX with the user set MFEX. */ int
>>> +dp_mfex_impl_set_default_by_name(const char *name);
>>> +
>>> +
>>> +/* Initializes the available miniflow extract implementations by
>>> +probing for
>>> + * the CPU ISA requirements. As the runtime available CPU ISA does
>>> +not change
>>> + * and the required ISA of the implementation also does not change,
>>> +it is safe
>>> + * to cache the probe() results, and not call probe() at runtime.
>>> + */
>>> +void
>>> +dpif_miniflow_extract_init(void);
>>> +
>>> +#endif /* MFEX_AVX512_EXTRACT */
>>> diff --git a/lib/dpif-netdev-private-thread.h
>>> b/lib/dpif-netdev-private-thread.h
>>> index ba79c4a0a..a4c092b69 100644
>>> --- a/lib/dpif-netdev-private-thread.h
>>> +++ b/lib/dpif-netdev-private-thread.h
>>> @@ -27,6 +27,11 @@
>>>  #include <stdint.h>
>>>
>>>  #include "cmap.h"
>>> +
>>> +#include "dpif-netdev-private-dfc.h"
>>> +#include "dpif-netdev-private-dpif.h"
>>> +#include "dpif-netdev-perf.h"
>>> +#include "dpif-netdev-private-extract.h"
>>>  #include "openvswitch/thread.h"
>>>
>>>  #ifdef  __cplusplus
>>> @@ -110,6 +115,9 @@ struct dp_netdev_pmd_thread {
>>>      /* Pointer for per-DPIF implementation scratch space. */
>>>      void *netdev_input_func_userdata;
>>>
>>> +    /* Function pointer to call for miniflow_extract() functionality. */
>>> +    ATOMIC(miniflow_extract_func) miniflow_extract_opt;
>>> +
>>>      struct seq *reload_seq;
>>>      uint64_t last_reload_seq;
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
>>> 610949f36..f0ae5e4f4 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -45,6 +45,7 @@
>>>  #include "dpif.h"
>>>  #include "dpif-netdev-lookup.h"
>>>  #include "dpif-netdev-perf.h"
>>> +#include "dpif-netdev-private-extract.h"
>>>  #include "dpif-provider.h"
>>>  #include "dummy.h"
>>>  #include "fat-rwlock.h"
>>> @@ -117,7 +118,7 @@
>> COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
>>>  COVERAGE_DEFINE(datapath_drop_hw_miss_recover);
>>>
>>>  /* Protects against changes to 'dp_netdevs'. */ -static struct
>>> ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
>>> +struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
>>>
>>>  /* Contains all 'struct dp_netdev's. */  static struct shash
>>> dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex) @@ -1050,6 +1051,97 @@
>>> dpif_netdev_impl_set(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>>      ds_destroy(&reply);
>>>  }
>>>
>>> +static void
>>> +dpif_miniflow_extract_impl_get(struct unixctl_conn *conn, int argc
>> OVS_UNUSED,
>>> +                               const char *argv[] OVS_UNUSED,
>>> +                               void *aux OVS_UNUSED) {
>>> +    struct ds reply = DS_EMPTY_INITIALIZER;
>>> +    struct shash_node *node;
>>> +
>>> +    ovs_mutex_lock(&dp_netdev_mutex);
>>> +    SHASH_FOR_EACH (node, &dp_netdevs) {
>>> +        struct dp_netdev_pmd_thread **pmd_list;
>>> +        struct dp_netdev *dp = node->data;
>>> +        size_t n;
>>> +
>>> +        /* Get PMD threads list, required to get the DPIF impl used by each PMD
>>> +         * thread. */
>>> +        sorted_poll_thread_list(dp, &pmd_list, &n);
>>> +        dp_mfex_impl_get(&reply, pmd_list, n);
>>> +    }
>>> +    ovs_mutex_unlock(&dp_netdev_mutex);
>>> +    unixctl_command_reply(conn, ds_cstr(&reply));
>>> +    ds_destroy(&reply);
>>> +}
>>> +
>>> +static void
>>> +dpif_miniflow_extract_impl_set(struct unixctl_conn *conn, int argc
>> OVS_UNUSED,
>>> +                               const char *argv[], void *aux
>>> +OVS_UNUSED) {
>>> +    /* This function requires just one parameter, the miniflow name.
>>> +     */
>>> +    const char *mfex_name = argv[1];
>>> +    struct shash_node *node;
>>> +
>>> +    int err = dp_mfex_impl_set_default_by_name(mfex_name);
>>> +
>>> +    if (err) {
>>> +        ovs_mutex_unlock(&dp_netdev_mutex);
>>
>> You do not hold the lock :(
>>
>
> Yea already saw that and fixed it  😊
>
> <Snip>
>
> Br
> Amber



More information about the dev mailing list