[ovs-dev] [PATCH v8 06/13] dp-packet: Handle multi-seg mbufs in helper funcs.

Fri Jun 22 19:04:22 UTC 2018

On 18/06/2018 12:50, Eelco Chaudron wrote:
> 
> 
> On 11 Jun 2018, at 18:21, Tiago Lam wrote:
> 
>> Most helper functions in dp-packet assume that the data held by a
>> dp_packet is contiguous, and perform operations such as pointer
>> arithmetic under that assumption. However, with the introduction of
>> multi-segment mbufs, where data is non-contiguous, such assumptions 
>> are
>> no longer possible. Some examples of Such helper functions are
>> dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
>> dp_packet_get_allocated() and dp_packet_at().
>>
>> Thus, instead of assuming contiguous data in dp_packet, they  now
>> iterate over the (non-contiguous) data in mbufs to perform their
>> calculations.
>>
>> Finally, dp_packet_use__() has also been modified to perform the
>> initialisation of the packet (and setting the source) before 
>> continuing
>> to set its size and data length, which now depends on the type of
>> packet.
>>
>> Co-authored-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
>>
>> Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
>> Signed-off-by: Tiago Lam <tiago.lam at intel.com>
>> ---
>>  lib/dp-packet.c |   4 +-
>>  lib/dp-packet.h | 242 
>> +++++++++++++++++++++++++++++++++++++++++++++-----------
>>  2 files changed, 197 insertions(+), 49 deletions(-)
>>
>> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
>> index 782e7c2..2aaeaae 100644
>> --- a/lib/dp-packet.c
>> +++ b/lib/dp-packet.c
>> @@ -41,11 +41,11 @@ static void
>>  dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
>>               enum dp_packet_source source)
>>  {
>> +    dp_packet_init__(b, allocated, source);
>> +
>>      dp_packet_set_base(b, base);
>>      dp_packet_set_data(b, base);
>>      dp_packet_set_size(b, 0);
>> -
>> -    dp_packet_init__(b, allocated, source);
>>  }
>>
>>  /* Initializes 'b' as an empty dp_packet that contains the 
>> 'allocated' bytes of
>> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>> index c301ed5..272597f 100644
>> --- a/lib/dp-packet.h
>> +++ b/lib/dp-packet.h
>> @@ -133,6 +133,10 @@ static inline void *dp_packet_at(const struct 
>> dp_packet *, size_t offset,
>>                                   size_t size);
>>  static inline void *dp_packet_at_assert(const struct dp_packet *,
>>                                          size_t offset, size_t size);
>> +#ifdef DPDK_NETDEV
>> +static inline void * dp_packet_at_offset(const struct dp_packet *b,
>> +                                         size_t offset);
>> +#endif
>>  static inline void *dp_packet_tail(const struct dp_packet *);
>>  static inline void *dp_packet_end(const struct dp_packet *);
>>
>> @@ -180,40 +184,6 @@ dp_packet_delete(struct dp_packet *b)
>>      }
>>  }
>>
>> -/* If 'b' contains at least 'offset + size' bytes of data, returns a 
>> pointer to
>> - * byte 'offset'.  Otherwise, returns a null pointer. */
>> -static inline void *
>> -dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>> -{
>> -    return offset + size <= dp_packet_size(b)
>> -           ? (char *) dp_packet_data(b) + offset
>> -           : NULL;
>> -}
>> -
>> -/* Returns a pointer to byte 'offset' in 'b', which must contain at 
>> least
>> - * 'offset + size' bytes of data. */
>> -static inline void *
>> -dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
>> size)
>> -{
>> -    ovs_assert(offset + size <= dp_packet_size(b));
>> -    return ((char *) dp_packet_data(b)) + offset;
>> -}
>> -
>> -/* Returns a pointer to byte following the last byte of data in use 
>> in 'b'. */
>> -static inline void *
>> -dp_packet_tail(const struct dp_packet *b)
>> -{
>> -    return (char *) dp_packet_data(b) + dp_packet_size(b);
>> -}
>> -
>> -/* Returns a pointer to byte following the last byte allocated for 
>> use (but
>> - * not necessarily in use) in 'b'. */
>> -static inline void *
>> -dp_packet_end(const struct dp_packet *b)
>> -{
>> -    return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
>> -}
>> -
>>  /* Returns the number of bytes of headroom in 'b', that is, the 
>> number of bytes
>>   * of unused space in dp_packet 'b' before the data that is in use.  
>> (Most
>>   * commonly, the data in a dp_packet is at its beginning, and thus 
>> the
>> @@ -229,6 +199,14 @@ dp_packet_headroom(const struct dp_packet *b)
>>  static inline size_t
>>  dp_packet_tailroom(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (b->source == DPBUF_DPDK) {
>> +        struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, 
>> &b->mbuf);
>> +        tmbuf = rte_pktmbuf_lastseg(tmbuf);
>> +
> 
> What if the last two chains contain no user data, we have more 
> tailroom?!? Should we not call rte_pktmbuf_tailroom(tmbuf)?

I've covered this in my reply to the cover letter, hopefully it is more
clear now. This shouldn't be a possibility in the implementation. Also,
we do call rte_pktmbuf_tailroom(), just below.

> 
>> +        return rte_pktmbuf_tailroom(tmbuf);
>> +    }
>> +#endif
>>      return (char *) dp_packet_end(b) - (char *) dp_packet_tail(b);
>>  }
>>
>> @@ -236,8 +214,13 @@ dp_packet_tailroom(const struct dp_packet *b)
>>  static inline void
>>  dp_packet_clear(struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    dp_packet_set_size(b, 0);
>> +    rte_pktmbuf_reset(&b->mbuf);
> 
> This would probably assert on none DPBUF_DPDK, do we need a “if 
> (b->source == DPBUF_DPDK)” here also?
> 

We do, I'll add it to next iteration. It only asserts when using DPDK
with `RTE_LIBRTE_MBUF_DEBUG`, but still...

>> +#else
>>      dp_packet_set_data(b, dp_packet_base(b));
>>      dp_packet_set_size(b, 0);
>> +#endif
>>  }
>>
>>  /* Removes 'size' bytes from the head end of 'b', which must contain 
>> at least
>> @@ -252,12 +235,32 @@ dp_packet_pull(struct dp_packet *b, size_t size)
>>      return data;
>>  }
>>
>> +#ifdef DPDK_NETDEV
>> +/* Similar to dp_packet_try_pull() but doesn't actually pull any 
>> code, only
>> + * returns true or false if it would be possible to actually pull any 
>> code.
>> + * Valid for dp_packets carrying mbufs only. */
>> +static inline bool
>> +dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
>> +    if (size > b->mbuf.data_len) {
>> +        return false;
>> +    }
>> +
>> +    return true;
>> +}
>> +#endif
>> +
>>  /* If 'b' has at least 'size' bytes of data, removes that many bytes 
>> from the
>>   * head end of 'b' and returns the first byte removed.  Otherwise, 
>> returns a
>>   * null pointer without modifying 'b'. */
>>  static inline void *
>>  dp_packet_try_pull(struct dp_packet *b, size_t size)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, size)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
>>      return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
>>          ? dp_packet_pull(b, size) : NULL;
>>  }
>> @@ -311,6 +314,12 @@ dp_packet_set_l2_pad_size(struct dp_packet *b, 
>> uint8_t pad_size)
>>  static inline void *
>>  dp_packet_l2_5(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, b->l2_5_ofs)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
>>      return b->l2_5_ofs != UINT16_MAX
>>             ? (char *) dp_packet_data(b) + b->l2_5_ofs
>>             : NULL;
>> @@ -327,6 +336,12 @@ dp_packet_set_l2_5(struct dp_packet *b, void 
>> *l2_5)
>>  static inline void *
>>  dp_packet_l3(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, b->l3_ofs)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
> 
> Can we not use the dp_packet_at_offset() function here (and the similar 
> functions)? And include the above and below checks there?
> 

They are aimed at different purposes. dp_packet_at_offset() should walk
the chain of mbufs and return a pointer to the offset, while here we
actually want to know if the offset is within the first mbuf. If these
headers are not within the first mbuf, we don't return it as it crosses
mbufs.

>>      return b->l3_ofs != UINT16_MAX
>>             ? (char *) dp_packet_data(b) + b->l3_ofs
>>             : NULL;
>> @@ -341,6 +356,12 @@ dp_packet_set_l3(struct dp_packet *b, void *l3)
>>  static inline void *
>>  dp_packet_l4(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (!dp_packet_mbuf_may_pull(b, b->l4_ofs)) {
>> +        return NULL;
>> +    }
>> +#endif
>> +
>>      return b->l4_ofs != UINT16_MAX
>>             ? (char *) dp_packet_data(b) + b->l4_ofs
>>             : NULL;
>> @@ -355,10 +376,37 @@ dp_packet_set_l4(struct dp_packet *b, void *l4)
>>  static inline size_t
>>  dp_packet_l4_size(const struct dp_packet *b)
>>  {
>> +#ifdef DPDK_NETDEV
>> +    if (b->l4_ofs == UINT16_MAX) {
>> +        return 0;
>> +    }
>> +
>> +    struct rte_mbuf *hmbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>> +    struct rte_mbuf *tmbuf = dp_packet_tail(b);
>> +
>> +    size_t l4_size = 0;
>> +    if (hmbuf == tmbuf) {
> 
> Do not think this optimisation help much, as the dp_packet_tail(b) does 
> the same walk as in the "while (hmbuf)" clause below, so why not just 
> keet the else case?	
> 

Yeah, good point. I'll change that in the next iteration.

>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>> +    } else {
>> +        l4_size = hmbuf->data_len - b->l4_ofs;
>> +        hmbuf = hmbuf->next;
>> +
>> +        while (hmbuf) {
>> +            l4_size += tmbuf->data_len;
>> +
>> +            hmbuf = hmbuf->next;
>> +        }
>> +    }
>> +
>> +    l4_size -= dp_packet_l2_pad_size(b);
>> +
>> +    return l4_size;
>> +#else
>>      return b->l4_ofs != UINT16_MAX
>>          ? (const char *)dp_packet_tail(b) - (const char 
>> *)dp_packet_l4(b)
>>          - dp_packet_l2_pad_size(b)
>>          : 0;
>> +#endif
>>  }
>>
>>  static inline const void *
>> @@ -491,7 +539,7 @@ __packet_set_data(struct dp_packet *b, uint16_t v)
>>  static inline uint16_t
>>  dp_packet_get_allocated(const struct dp_packet *b)
>>  {
>> -    return b->mbuf.buf_len;
>> +    return b->mbuf.nb_segs * b->mbuf.buf_len;
>>  }
>>
>>  static inline void
>> @@ -499,7 +547,107 @@ dp_packet_set_allocated(struct dp_packet *b, 
>> uint16_t s)
>>  {
>>      b->mbuf.buf_len = s;
>>  }
>> +
>> +static inline void *
>> +dp_packet_at_offset(const struct dp_packet *b, size_t offset)
>> +{
>> +    if (b->source == DPBUF_DPDK) {
>> +        struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, 
>> &b->mbuf);
>> +
>> +        while (buf && offset > buf->data_len) {
>> +            offset -= buf->data_len;
>> +
>> +            buf = buf->next;
>> +        }
>> +        return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : 
>> NULL;
>> +    } else {
>> +        return (char *) dp_packet_data(b) + offset;
>> +    }
>> +}
>> +
>> +/* If 'b' contains at least 'offset + size' bytes of data, returns a 
>> pointer to
>> + * byte 'offset'.  Otherwise, returns a null pointer. */
>> +static inline void *
>> +dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
>> +{
>> +    return offset + size <= dp_packet_size(b)
>> +        ? dp_packet_at_offset(b, offset)
>> +        : NULL;
>> +}
>> +
>> +/* Returns a pointer to byte 'offset' in 'b', which must contain at 
>> least
>> + * 'offset + size' bytes of data. */
>> +static inline void *
>> +dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t 
>> size)
>> +{
>> +    ovs_assert(offset + size <= dp_packet_size(b));
>> +    return dp_packet_at_offset(b, offset);
>> +}
>> +
>> +/* Returns a pointer to byte following the last byte of data in use 
>> in 'b'. */
>> +static inline void *
>> +dp_packet_tail(const struct dp_packet *b)
>> +{
>> +    struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
>> +    if (b->source == DPBUF_DPDK) {
>> +        /* Find last segment where data ends, meaning the tail of the 
>> chained
>> +           mbufs is there */
>> +        buf = rte_pktmbuf_lastseg(buf);
>> +    }
>> +
> 
> What if the last two chains contains no user data, we should find the 
> last buffer with data.
> Looks like we assume only the last in the chain contains room, and we 
> achieve this by stripping off buffers (but we never add them again when 
> needed, so something is wrong in this implementation).
> 

Does my reply to the cover letter help here as well? That's a valid
point, that if the size is decreased and mbufs are freed then mbufs are
not allocated again if size needs to be incremented. But as explained,
this is to enforce that data is contiguous and so the tail points to the
last mbuf. I haven't yet encountered cases where this would lead to
complications, but maybe you have something in mind?

Tiago.