[ovs-dev] [PATCH] lib/util: Only define count_1bits_8 when needed.

Jarno Rajahalme jrajahalme at nicira.com
Wed Dec 11 23:32:38 UTC 2013


Ben,

I like this, I’ll post a new patch ASAP,

  Jarno

On Dec 11, 2013, at 11:33 AM, Ben Pfaff <blp at nicira.com> wrote:

> On Wed, Dec 11, 2013 at 10:54:00AM -0800, Jarno Rajahalme wrote:
>> util.h declares this when needed, make sure the definition is compiled
>> in only in that case.
>> 
>> Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
> 
> With this, I still get:
>        ../lib/util.c:921:15: warning: symbol 'count_1bits_8' was not declared. Should it be static?
> because the previous declaration of count_1bits_8[] is not in scope at
> the point of definition in util.c.
> 
> How about something like this?  (Not tested outside of 32-bit without
> __POPCNT__.)
> 
> diff --git a/lib/util.c b/lib/util.c
> index 13d41a7..000504c 100644
> --- a/lib/util.c
> +++ b/lib/util.c
> @@ -901,7 +901,7 @@ raw_clz64(uint64_t n)
> }
> #endif
> 
> -#if !(__GNUC__ >= 4 && defined(__corei7))
> +#if NEED_COUNT_1BITS_8
> #define INIT1(X)                                \
>     ((((X) & (1 << 0)) != 0) +                  \
>      (((X) & (1 << 1)) != 0) +                  \
> diff --git a/lib/util.h b/lib/util.h
> index 8d810c2..b158c2f 100644
> --- a/lib/util.h
> +++ b/lib/util.h
> @@ -371,49 +371,55 @@ log_2_ceil(uint64_t n)
>     return log_2_floor(n) + !is_pow2(n);
> }
> 
> -extern const uint8_t count_1bits_8[256];
> -
> -/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
> +/* unsigned int count_1bits(uint64_t x):
> + *
> + * Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
> +#if UINTPTR_MAX == UINT64_MAX
> +static inline unsigned int
> +count_1bits(uint64_t x)
> +{
> +#if __GNUC__ >= 4 && __POPCNT__
> +    return __builtin_popcountll(x);
> +#else
> +    /* This portable implementation is the fastest one we know of for 64
> +     * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
> +    const uint64_t h55 = UINT64_C(0x5555555555555555);
> +    const uint64_t h33 = UINT64_C(0x3333333333333333);
> +    const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
> +    const uint64_t h01 = UINT64_C(0x0101010101010101);
> +    x -= (x >> 1) & h55;               /* Count of each 2 bits in-place. */
> +    x = (x & h33) + ((x >> 2) & h33);  /* Count of each 4 bits in-place. */
> +    x = (x + (x >> 4)) & h0F;          /* Count of each 8 bits in-place. */
> +    return (x * h01) >> 56;            /* Sum of all bytes. */
> +#endif
> +}
> +#else  /* not 64-bit */
> +#if __GNUC__ >= 4 && __POPCNT__
> static inline unsigned int
> count_1bits_32(uint32_t x)
> {
> -#if __GNUC__ >= 4 && defined(__corei7)
> -    /* __builtin_popcount() is fast only when supported by the CPU. */
>     return __builtin_popcount(x);
> +}
> #else
> +#define NEED_COUNT_1BITS_8 1
> +extern const uint8_t count_1bits_8[256];
> +static inline unsigned int
> +count_1bits_32(uint32_t x)
> +{
>     /* This portable implementation is the fastest one we know of for 32 bits,
>      * and faster than GCC __builtin_popcount(). */
>     return (count_1bits_8[x & 0xff] +
>             count_1bits_8[(x >> 8) & 0xff] +
>             count_1bits_8[(x >> 16) & 0xff] +
>             count_1bits_8[x >> 24]);
> -#endif
> }
> -
> -/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
> +#endif
> static inline unsigned int
> count_1bits(uint64_t x)
> {
> -    if (sizeof(void *) == 8) { /* 64-bit CPU */
> -#if __GNUC__ >= 4 && defined(__corei7)
> -        /* __builtin_popcountll() is fast only when supported by the CPU. */
> -        return __builtin_popcountll(x);
> -#else
> -        /* This portable implementation is the fastest one we know of for 64
> -         * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
> -        const uint64_t h55 = UINT64_C(0x5555555555555555);
> -        const uint64_t h33 = UINT64_C(0x3333333333333333);
> -        const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
> -        const uint64_t h01 = UINT64_C(0x0101010101010101);
> -        x -= (x >> 1) & h55;               /* Count of each 2 bits in-place. */
> -        x = (x & h33) + ((x >> 2) & h33);  /* Count of each 4 bits in-place. */
> -        x = (x + (x >> 4)) & h0F;          /* Count of each 8 bits in-place. */
> -        return (x * h01) >> 56;            /* Sum of all bytes. */
> -#endif
> -    } else { /* 32-bit CPU */
> -        return count_1bits_32(x) + count_1bits_32(x >> 32);
> -    }
> +    return count_1bits_32(x) + count_1bits_32(x >> 32);
> }
> +#endif
> 
> /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 'x'
>  * is 0. */




More information about the dev mailing list