[ovs-dev] [patch v12 8/8] Userspace datapath: Add fragmentation handling.

Ben Pfaff blp at ovn.org
Tue Feb 12 01:33:49 UTC 2019


On Sun, Feb 10, 2019 at 06:56:30PM -0800, Darrell Ball wrote:
> Fragmentation handling is added for supporting conntrack.
> Both v4 and v6 are supported.
> 
> After discussion with several people, I decided to not store
> configuration state in the database to be more consistent with
> the kernel in future, similarity with other conntrack configuration
> which will not be in the database as well and overall simplicity.
> Accordingly, fragmentation handling is enabled by default.
> 
> This patch enables fragmentation tests for the userspace datapath.
> 
> Signed-off-by: Darrell Ball <dlu998 at gmail.com>

Thanks.

It's not obvious to me why ipf_reassemble_v6_frags() accounts for pad
size but ipf_reassemble_v4_frags() does not.  (Is there an inherent
difference for padding between v4 and v6?  If so, I've forgotten about
it.)

I had some other comments, but I decided that it would be easier to
present them as an incremental patch.  Please let me know what you think
of folding in the following changes.  They are mainly stylistic one way
or another, in the sense that I don't remember any of them actually
fixing bugs.

Thanks,

Ben.

--8<--------------------------cut here-------------------------->8--

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 46b5dd570d53..6f2e788c6703 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2015, 2016, 2017, 2019 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -341,7 +341,7 @@ conntrack_init(struct conntrack *ct)
     atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT);
     latch_init(&ct->clean_thread_exit);
     ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
-    ipf_init(&ct->ipf);
+    ct->ipf = ipf_init();
 }
 
 /* Destroys the connection tracker 'ct' and frees all the allocated memory. */
diff --git a/lib/ipf.c b/lib/ipf.c
index bdfd85579165..8dc63ea99529 100644
--- a/lib/ipf.c
+++ b/lib/ipf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Nicira, Inc.
+ * Copyright (c) 2018, 2019 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -84,10 +84,8 @@ enum ipf_counter_type {
 };
 
 union ipf_addr {
-    ovs_16aligned_be32 ipv4;
-    union ovs_16aligned_in6_addr ipv6;
-    ovs_be32 ipv4_aligned;
-    struct in6_addr ipv6_aligned;
+    ovs_be32 ipv4;
+    struct in6_addr ipv6;
 };
 
 /* Represents a single fragment; part of a list of fragments. */
@@ -101,6 +99,8 @@ struct ipf_frag {
 /* The key for a collection of fragments potentially making up an unfragmented
  * packet. */
 struct ipf_list_key {
+    /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the first
+     * two members. */
     union ipf_addr src_addr;
     union ipf_addr dst_addr;
     uint32_t recirc_id;
@@ -112,8 +112,9 @@ struct ipf_list_key {
 
 /* A collection of fragments potentially making up an unfragmented packet. */
 struct ipf_list {
-    struct hmap_node node;
-    struct ovs_list list_node;
+    struct hmap_node node;         /* In struct ipf's 'frag_lists'. */
+    struct ovs_list list_node;     /* In struct ipf's 'frag_exp_list' or
+                                    * 'frag_complete_list'. */
     struct ipf_frag *frag_list;    /* List of fragments for this list. */
     struct ipf_list_key key;       /* The key for the fragemnt list. */
     struct dp_packet *reass_execute_ctx; /* Reassembled packet. */
@@ -127,15 +128,11 @@ struct ipf_list {
 /* Represents a reassambled packet which typically is passed through
  * conntrack. */
 struct reassembled_pkt {
-    struct ovs_list rp_list_node;
+    struct ovs_list rp_list_node; /* In struct ipf's 'reassembled_pkt_list'. */
     struct dp_packet *pkt;
     struct ipf_list *list;
 };
 
-struct OVS_LOCKABLE ipf_lock {
-    struct ovs_mutex lock;
-};
-
 struct ipf {
     /* The clean thread is used to clean up fragments in the 'ipf'
      * module if packet batches are not longer be sent through its user. */
@@ -144,11 +141,12 @@ struct ipf {
 
     int max_v4_frag_list_size;
 
-    /* Adaptive mutex protecting the following frag_list hmap andlists. */
-    struct ipf_lock ipf_lock;
+    struct ovs_mutex ipf_lock; /* Protects all of the following. */
+    /* These contain "struct ipf_list"s. */
     struct hmap frag_lists OVS_GUARDED;
     struct ovs_list frag_exp_list OVS_GUARDED;
     struct ovs_list frag_complete_list OVS_GUARDED;
+    /* Contains "struct reassembled_pkt"s. */
     struct ovs_list reassembled_pkt_list OVS_GUARDED;
 
     /* Used to allow disabling fragmentation reassembly. */
@@ -171,19 +169,16 @@ struct ipf {
     atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS];
 };
 
-#define IPF_PTR(POINTER)     \
-    CONST_CAST(struct ipf *, POINTER)
-
 static void
-ipf_print_reass_packet(char *es, void *pkt)
+ipf_print_reass_packet(const char *es, const void *pkt)
 {
-    const unsigned char *b = (const unsigned char *) pkt;
-    struct ds ds = DS_EMPTY_INITIALIZER;
-    ds_put_format(&ds, "%s 128 bytes from specified header:\n", es);
-    ds_put_hex_dump(&ds, b, 128, 0, false);
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
-    VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds));
-    ds_destroy(&ds);
+    if (!VLOG_DROP_WARN(&rl)) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+        ds_put_hex_dump(&ds, pkt, 128, 0, false);
+        VLOG_WARN("%s\n%s", es, ds_cstr(&ds));
+        ds_destroy(&ds);
+    }
 }
 
 static void
@@ -222,7 +217,7 @@ ipf_addr_hash_add(uint32_t hash, const union ipf_addr *addr)
 }
 
 /* Adds a list of fragments to the list tracking expiry of yet to be
- * completed reassembled packets, hence subject to expirty. */
+ * completed reassembled packets, hence subject to expiry. */
 static void
 ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list *ipf_list,
                     long long now)
@@ -314,12 +309,10 @@ ipf_list_key_hash(const struct ipf_list_key *key, uint32_t basis)
     hash = hsrc ^ hdst;
 
     /* Hash the rest of the key. */
-    hash = hash_words((uint32_t *) (&key->dst_addr + 1),
+    return hash_words((uint32_t *) (&key->dst_addr + 1),
                       (uint32_t *) (key + 1) -
                           (uint32_t *) (&key->dst_addr + 1),
                       hash);
-
-    return hash_finish(hash, 0);
 }
 
 static bool
@@ -392,15 +385,13 @@ static void
 ipf_sort(struct ipf_frag *frag_list, size_t last_idx)
     /* OVS_REQUIRES(ipf_lock) */
 {
-    struct ipf_frag ipf_frag;
-    for (int li = 1; li <=  last_idx; li++) {
-        ipf_frag = frag_list[li];
+    for (int li = 1; li <= last_idx; li++) {
+        struct ipf_frag ipf_frag = frag_list[li];
         int ci = li - 1;
         while (ci >= 0 &&
-               frag_list[ci].start_data_byte >
-                   ipf_frag.start_data_byte) {
+               frag_list[ci].start_data_byte > ipf_frag.start_data_byte) {
             frag_list[ci + 1] = frag_list[ci];
-            ci -= 1;
+            ci--;
         }
         frag_list[ci + 1] = ipf_frag;
     }
@@ -461,9 +452,7 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
     int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag);
     const char *tail = dp_packet_tail(pkt);
     uint8_t pad = dp_packet_l2_pad_size(pkt);
-    const char *l4 = dp_packet_l4(pkt);
     size_t l3_size = tail - (char *)l3 - pad;
-    size_t add_len;
 
     int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte -
                    frag_list[1].start_data_byte + 1;
@@ -478,14 +467,13 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
     dp_packet_prealloc_tailroom(pkt, pl + rest_len);
 
     for (int i = 1; i <= ipf_list->last_inuse_idx; i++) {
-        add_len = frag_list[i].end_data_byte -
-                          frag_list[i].start_data_byte + 1;
+        size_t add_len = frag_list[i].end_data_byte -
+                              frag_list[i].start_data_byte + 1;
         pl += add_len;
-        l4 = dp_packet_l4(frag_list[i].pkt);
+        const char *l4 = dp_packet_l4(frag_list[i].pkt);
         dp_packet_put(pkt, l4, add_len);
     }
     l3 = dp_packet_l3(pkt);
-    l4 = dp_packet_l4(pkt);
     tail = dp_packet_tail(pkt);
     pad = dp_packet_l2_pad_size(pkt);
     l3_size = tail - (char *)l3 - pad;
@@ -544,8 +532,6 @@ ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list,
     case IPF_LIST_STATE_LAST_SEEN:
         if (ff) {
             next_state = IPF_LIST_STATE_FIRST_LAST_SEEN;
-        } else if (lf) {
-            next_state = IPF_LIST_STATE_LAST_SEEN;
         } else {
             next_state = IPF_LIST_STATE_LAST_SEEN;
         }
@@ -602,7 +588,7 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
         goto invalid_pkt;
     }
 
-    if (!(IP_IS_FRAGMENT(l3->ip_frag_off))) {
+    if (!IP_IS_FRAGMENT(l3->ip_frag_off)) {
         return false;
     }
 
@@ -636,7 +622,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
 invalid_pkt:
     pkt->md.ct_state = CS_INVALID;
     return false;
-
 }
 
 static bool
@@ -655,8 +640,8 @@ ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone,
     memset(key, 0, sizeof *key);
     key->ip_id = be16_to_be32(l3->ip_id);
     key->dl_type = dl_type;
-    key->src_addr.ipv4 = l3->ip_src;
-    key->dst_addr.ipv4 = l3->ip_dst;
+    key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src);
+    key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst);
     key->nw_proto = l3->ip_proto;
     key->zone = zone;
     key->recirc_id = pkt->md.recirc_id;
@@ -725,7 +710,7 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone,
                    struct ipf_list_key *key, uint16_t *start_data_byte,
                    uint16_t *end_data_byte, bool *ff, bool *lf)
 {
-    const struct  ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
+    const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
     const char *l4 = dp_packet_l4(pkt);
     const char *tail = dp_packet_tail(pkt);
     uint8_t pad = dp_packet_l2_pad_size(pkt);
@@ -749,10 +734,10 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone,
     memset(key, 0, sizeof *key);
     key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident);
     key->dl_type = dl_type;
-    key->src_addr.ipv6 = l3->ip6_src;
+    memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6);
     /* We are not supporting parsing of the routing header to use as the
      * dst address part of the key. */
-    key->dst_addr.ipv6 = l3->ip6_dst;
+    memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6);
     key->nw_proto = 0;   /* Not used for key for V6. */
     key->zone = zone;
     key->recirc_id = pkt->md.recirc_id;
@@ -765,11 +750,11 @@ ipf_list_key_eq(const struct ipf_list_key *key1,
 {
     if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) &&
         !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) &&
-        (key1->dl_type == key2->dl_type) &&
-        (key1->ip_id == key2->ip_id) &&
-        (key1->zone == key2->zone) &&
-        (key1->nw_proto == key2->nw_proto) &&
-        (key1->recirc_id == key2->recirc_id)) {
+        key1->dl_type == key2->dl_type &&
+        key1->ip_id == key2->ip_id &&
+        key1->zone == key2->zone &&
+        key1->nw_proto == key2->nw_proto &&
+        key1->recirc_id == key2->recirc_id) {
         return true;
     }
     return false;
@@ -795,10 +780,10 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx,
     /* OVS_REQUIRES(ipf_lock) */
 {
     for (int i = 0; i <= last_inuse_idx; i++) {
-        if (((start_data_byte >= frag_list[i].start_data_byte) &&
-            (start_data_byte <= frag_list[i].end_data_byte)) ||
-            ((end_data_byte >= frag_list[i].start_data_byte) &&
-             (end_data_byte <= frag_list[i].end_data_byte))) {
+        if ((start_data_byte >= frag_list[i].start_data_byte &&
+             start_data_byte <= frag_list[i].end_data_byte) ||
+            (end_data_byte >= frag_list[i].start_data_byte &&
+             end_data_byte <= frag_list[i].end_data_byte)) {
             return true;
         }
     }
@@ -827,13 +812,11 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list,
              * mempool size being too limited. We will otherwise need to
              * recommend not setting the mempool number of buffers too low
              * and also clamp the number of fragments. */
-            ipf_list->frag_list[last_inuse_idx + 1].pkt = pkt;
-            ipf_list->frag_list[last_inuse_idx + 1].start_data_byte =
-                start_data_byte;
-            ipf_list->frag_list[last_inuse_idx + 1].end_data_byte =
-                end_data_byte;
-            ipf_list->frag_list[last_inuse_idx + 1].dnsteal =
-                dnsteal;
+            struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1];
+            frag->pkt = pkt;
+            frag->start_data_byte = start_data_byte;
+            frag->end_data_byte = end_data_byte;
+            frag->dnsteal = dnsteal;
             ipf_list->last_inuse_idx++;
             atomic_count_inc(&ipf->nfrag);
             ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED);
@@ -859,8 +842,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key,
     ipf_list->reass_execute_ctx = NULL;
     ipf_list->state = IPF_LIST_STATE_UNUSED;
     ipf_list->size = max_frag_list_size;
-    ipf_list->frag_list =
-        xzalloc(ipf_list->size * sizeof *ipf_list->frag_list);
+    ipf_list->frag_list
+        = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list);
 }
 
 /* Generates a fragment list key from a well formed fragment and either starts
@@ -892,9 +875,9 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type,
         return false;
     }
 
-    unsigned int nfrag_max_;
-    atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max_);
-    if (atomic_count_get(&ipf->nfrag) >= nfrag_max_) {
+    unsigned int nfrag_max;
+    atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max);
+    if (atomic_count_get(&ipf->nfrag) >= nfrag_max) {
         return false;
     }
 
@@ -947,30 +930,27 @@ static void
 ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb,
                              ovs_be16 dl_type, uint16_t zone, long long now,
                              uint32_t hash_basis)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     const size_t pb_cnt = dp_packet_batch_size(pb);
     int pb_idx; /* Index in a packet batch. */
     struct dp_packet *pkt;
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) {
-
         if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) &&
                           ipf_is_valid_v4_frag(ipf, pkt))
                           ||
                           (dl_type == htons(ETH_TYPE_IPV6) &&
                           ipf_is_valid_v6_frag(ipf, pkt)))) {
 
-            ovs_mutex_lock(&ipf->ipf_lock.lock);
+            ovs_mutex_lock(&ipf->ipf_lock);
             if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis,
                                  pb->do_not_steal)) {
                 dp_packet_batch_refill(pb, pkt, pb_idx);
             }
-            ovs_mutex_unlock(&ipf->ipf_lock.lock);
+            ovs_mutex_unlock(&ipf->ipf_lock);
         } else {
             dp_packet_batch_refill(pb, pkt, pb_idx);
         }
-
     }
 }
 
@@ -1017,9 +997,9 @@ ipf_purge_list_check(struct ipf *ipf, struct ipf_list *ipf_list,
         return false;
     }
 
-    struct dp_packet *pkt;
     while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
-        pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
+        struct dp_packet *pkt
+            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
         dp_packet_delete(pkt);
         atomic_count_dec(&ipf->nfrag);
         COVERAGE_INC(ipf_stuck_frag_list_purged);
@@ -1043,11 +1023,10 @@ ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list,
         return true;
     }
 
-    struct dp_packet *pkt;
     while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
-        pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
+        struct dp_packet *pkt
+            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
         if (ipf_dp_packet_batch_add(pb, pkt, true)) {
-
             ipf_list->last_sent_idx++;
             atomic_count_dec(&ipf->nfrag);
 
@@ -1074,13 +1053,12 @@ ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list,
 static void
 ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb,
                          long long now, bool v6)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     if (ovs_list_is_empty(&ipf->frag_complete_list)) {
         return;
     }
 
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     struct ipf_list *ipf_list, *next;
 
     LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_complete_list) {
@@ -1092,7 +1070,7 @@ ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb,
         }
     }
 
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
 }
 
 /* Conservatively adds fragments associated with a expired fragment list to
@@ -1101,7 +1079,6 @@ ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb,
 static void
 ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
                        long long now, bool v6)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     enum {
         /* Very conservative, due to DOS probability. */
@@ -1113,12 +1090,12 @@ ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
         return;
     }
 
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     struct ipf_list *ipf_list, *next;
     size_t lists_removed = 0;
 
     LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_exp_list) {
-        if (!(now > ipf_list->expiration) ||
+        if (now <= ipf_list->expiration ||
             lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) {
             break;
         }
@@ -1132,20 +1109,19 @@ ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
         }
     }
 
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
 }
 
 /* Adds a reassmebled packet to a packet batch to be processed by the caller.
  */
 static void
 ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) {
         return;
     }
 
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     struct reassembled_pkt *rp, *next;
 
     LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) {
@@ -1155,7 +1131,7 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb)
         }
     }
 
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
 }
 
 /* Checks for reassembled packets post processing by conntrack and edits the
@@ -1163,13 +1139,12 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb)
 static void
 ipf_post_execute_reass_pkts(struct ipf *ipf,
                             struct dp_packet_batch *pb, bool v6)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) {
         return;
     }
 
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     struct reassembled_pkt *rp, *next;
 
     LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) {
@@ -1196,26 +1171,23 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
                     }
                 }
 
-                const char *tail_frag =
-                    dp_packet_tail(rp->list->frag_list[0].pkt);
-                uint8_t pad_frag =
-                    dp_packet_l2_pad_size(rp->list->frag_list[0].pkt);
+                const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
+                const char *tail_frag = dp_packet_tail(frag_0->pkt);
+                uint8_t pad_frag = dp_packet_l2_pad_size(frag_0->pkt);
 
-                void *l4_frag = dp_packet_l4(rp->list->frag_list[0].pkt);
+                void *l4_frag = dp_packet_l4(frag_0->pkt);
                 void *l4_reass = dp_packet_l4(pkt);
                 memcpy(l4_frag, l4_reass,
                        tail_frag - (char *) l4_frag - pad_frag);
 
                 if (v6) {
-                    struct  ovs_16aligned_ip6_hdr *l3_frag =
-                        dp_packet_l3(rp->list->frag_list[0].pkt);
-                    struct  ovs_16aligned_ip6_hdr *l3_reass =
-                        dp_packet_l3(pkt);
+                    struct ovs_16aligned_ip6_hdr *l3_frag =
+                        dp_packet_l3(frag_0->pkt);
+                    struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt);
                     l3_frag->ip6_src = l3_reass->ip6_src;
                     l3_frag->ip6_dst = l3_reass->ip6_dst;
                 } else {
-                    struct ip_header *l3_frag =
-                        dp_packet_l3(rp->list->frag_list[0].pkt);
+                    struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt);
                     struct ip_header *l3_reass = dp_packet_l3(pkt);
                     ovs_be32 reass_ip = get_16aligned_be32(&l3_reass->ip_src);
                     ovs_be32 frag_ip = get_16aligned_be32(&l3_frag->ip_src);
@@ -1240,19 +1212,17 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
         }
     }
 
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
 }
 
 /* Extracts any fragments from the batch and reassembles them when a
  * complete packet is received.  Completed packets are attempted to
  * be added to the batch to be sent through conntrack. */
 void
-ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
+ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
                          long long now, ovs_be16 dl_type, uint16_t zone,
                          uint32_t hash_basis)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-
     if (ipf_get_enabled(ipf)) {
         ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now, hash_basis);
     }
@@ -1267,11 +1237,9 @@ ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
  * fragments are marked as invalid and also added to the batches seen
  * with low priority.  Reassembled packets are freed. */
 void
-ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
+ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
                           long long now, ovs_be16 dl_type)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-
     if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) {
         bool v6 = dl_type == htons(ETH_TYPE_IPV6);
         ipf_post_execute_reass_pkts(ipf, pb, v6);
@@ -1282,9 +1250,8 @@ ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
 
 static void *
 ipf_clean_thread_main(void *f)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
-    struct ipf *ipf = IPF_PTR(f);
+    struct ipf *ipf = f;
 
     enum {
         IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000,
@@ -1297,7 +1264,7 @@ ipf_clean_thread_main(void *f)
         if (!ovs_list_is_empty(&ipf->frag_exp_list) ||
             !ovs_list_is_empty(&ipf->frag_complete_list)) {
 
-            ovs_mutex_lock(&ipf->ipf_lock.lock);
+            ovs_mutex_lock(&ipf->ipf_lock);
 
             struct ipf_list *ipf_list, *next;
             LIST_FOR_EACH_SAFE (ipf_list, next, list_node,
@@ -1314,7 +1281,7 @@ ipf_clean_thread_main(void *f)
                 }
             }
 
-            ovs_mutex_unlock(&ipf->ipf_lock.lock);
+            ovs_mutex_unlock(&ipf->ipf_lock);
         }
 
         poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT);
@@ -1325,15 +1292,13 @@ ipf_clean_thread_main(void *f)
     return NULL;
 }
 
-void
-ipf_init(void **ipf_)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
+struct ipf *
+ipf_init(void)
 {
-    *ipf_ = xzalloc(sizeof(struct ipf));
-    struct ipf *ipf = IPF_PTR(*ipf_);
+    struct ipf *ipf = xzalloc(sizeof *ipf);
 
-    ovs_mutex_init_adaptive(&ipf->ipf_lock.lock);
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_init_adaptive(&ipf->ipf_lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     hmap_init(&ipf->frag_lists);
     ovs_list_init(&ipf->frag_exp_list);
     ovs_list_init(&ipf->frag_complete_list);
@@ -1343,44 +1308,35 @@ ipf_init(void **ipf_)
     ipf->max_v4_frag_list_size = DIV_ROUND_UP(
         IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE,
         ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE);
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
     atomic_count_init(&ipf->nfrag, 0);
-    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED], 0);
-    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT], 0);
-    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_EXPD_SENT], 0);
-    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL], 0);
-    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP], 0);
-    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED], 0);
-    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED], 0);
-    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT], 0);
-    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_EXPD_SENT], 0);
-    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL], 0);
-    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP], 0);
-    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED], 0);
+    for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) {
+        atomic_init(&ipf->n4frag_cnt[i], 0);
+        atomic_init(&ipf->n6frag_cnt[i], 0);
+    }
     atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT);
     atomic_init(&ipf->ifp_v4_enabled, true);
     atomic_init(&ipf->ifp_v6_enabled, true);
     latch_init(&ipf->ipf_clean_thread_exit);
     ipf->ipf_clean_thread = ovs_thread_create("ipf_clean",
                                          ipf_clean_thread_main, ipf);
+
+    return ipf;
 }
 
 void
-ipf_destroy(void *ipf_)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
+ipf_destroy(struct ipf *ipf)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     latch_set(&ipf->ipf_clean_thread_exit);
     pthread_join(ipf->ipf_clean_thread, NULL);
     latch_destroy(&ipf->ipf_clean_thread_exit);
 
     struct ipf_list *ipf_list;
     HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) {
-        struct dp_packet *pkt;
         while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
-            pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
+            struct dp_packet *pkt
+                = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
             if (!ipf_list->frag_list[ipf_list->last_sent_idx + 1].dnsteal) {
                 dp_packet_delete(pkt);
             }
@@ -1395,7 +1351,7 @@ ipf_destroy(void *ipf_)
         VLOG_WARN("ipf destroy with non-zero fragment count. ");
     }
 
-    struct reassembled_pkt * rp;
+    struct reassembled_pkt *rp;
     LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) {
         dp_packet_delete(rp->pkt);
         free(rp);
@@ -1405,33 +1361,29 @@ ipf_destroy(void *ipf_)
     ovs_list_poison(&ipf->frag_exp_list);
     ovs_list_poison(&ipf->frag_complete_list);
     ovs_list_poison(&ipf->reassembled_pkt_list);
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
-    ovs_mutex_destroy(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
+    ovs_mutex_destroy(&ipf->ipf_lock);
     free(ipf);
 }
 
 int
-ipf_set_enabled(void *ipf_, bool v6, bool enable)
+ipf_set_enabled(struct ipf *ipf, bool v6, bool enable)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
     atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled,
                          enable);
     return 0;
 }
 
 int
-ipf_set_min_frag(void *ipf_, bool v6, uint32_t value)
+ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-
     /* If the user specifies an unreasonably large number, fragmentation
      * will not work well but it will not blow up. */
-    if ((!v6 && value < IPF_V4_FRAG_SIZE_LBOUND) ||
-        (v6 && value < IPF_V6_FRAG_SIZE_LBOUND)) {
+    if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND :  IPF_V4_FRAG_SIZE_LBOUND)) {
         return 1;
     }
 
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
     if (v6) {
         atomic_store_relaxed(&ipf->min_v6_frag_size, value);
     } else {
@@ -1440,15 +1392,13 @@ ipf_set_min_frag(void *ipf_, bool v6, uint32_t value)
             IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE,
             ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE);
     }
-    ovs_mutex_unlock(&ipf->ipf_lock.lock);
+    ovs_mutex_unlock(&ipf->ipf_lock);
     return 0;
 }
 
 int
-ipf_set_max_nfrags(void *ipf_, uint32_t value)
+ipf_set_max_nfrags(struct ipf *ipf, uint32_t value)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-
     if (value > IPF_NFRAG_UBOUND) {
         return 1;
     }
@@ -1457,10 +1407,8 @@ ipf_set_max_nfrags(void *ipf_, uint32_t value)
 }
 
 int
-ipf_get_status(void *ipf_, struct ipf_status *ipf_status)
+ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-
     ipf_status->nfrag = atomic_count_get(&ipf->nfrag);
     atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max);
 
@@ -1516,17 +1464,16 @@ ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx)
 static void
 ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds)
 {
-
     ds_put_cstr(ds, "(");
     if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) {
         ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",",
-                      IP_ARGS(ipf_list->key.src_addr.ipv4_aligned),
-                      IP_ARGS(ipf_list->key.dst_addr.ipv4_aligned));
+                      IP_ARGS(ipf_list->key.src_addr.ipv4),
+                      IP_ARGS(ipf_list->key.dst_addr.ipv4));
     } else {
         ds_put_cstr(ds, "src=");
-        ipv6_format_addr(&ipf_list->key.src_addr.ipv6_aligned, ds);
+        ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds);
         ds_put_cstr(ds, ",dst=");
-        ipv6_format_addr(&ipf_list->key.dst_addr.ipv6_aligned, ds);
+        ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds);
         ds_put_cstr(ds, ",");
     }
 
@@ -1547,25 +1494,23 @@ ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds)
  * ipf list, to which 'dump' is pointed to.  Returns EOF when there are no
  * more ipf lists. */
 int
-ipf_dump_next(void *ipf_, struct ipf_dump_ctx *ipf_dump_ctx, char **dump)
+ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char **dump)
 {
-    struct ipf *ipf = IPF_PTR(ipf_);
-    ovs_mutex_lock(&ipf->ipf_lock.lock);
+    ovs_mutex_lock(&ipf->ipf_lock);
 
     struct hmap_node *node = hmap_at_position(&ipf->frag_lists,
                                               &ipf_dump_ctx->bucket_pos);
     if (!node) {
-        ovs_mutex_unlock(&ipf->ipf_lock.lock);
+        ovs_mutex_unlock(&ipf->ipf_lock);
         return EOF;
     } else {
         struct ipf_list *ipf_list_;
         INIT_CONTAINER(ipf_list_, node, node);
         struct ipf_list ipf_list = *ipf_list_;
-        ovs_mutex_unlock(&ipf->ipf_lock.lock);
+        ovs_mutex_unlock(&ipf->ipf_lock);
         struct ds ds = DS_EMPTY_INITIALIZER;
         ipf_dump_create(&ipf_list, &ds);
-        *dump = xstrdup(ds.string);
-        ds_destroy(&ds);
+        *dump = ds_steal_cstr(&ds);
         return 0;
     }
 }
diff --git a/lib/ipf.h b/lib/ipf.h
index 68e12d500701..2bce3112431b 100644
--- a/lib/ipf.h
+++ b/lib/ipf.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Nicira, Inc.
+ * Copyright (c) 2018, 2019 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,23 +38,24 @@ struct ipf_status {
    unsigned int nfrag_max;
 };
 
-void ipf_init(void **ipf_);
-void ipf_destroy(void *ipf_);
-void ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
+struct ipf *ipf_init(void);
+void ipf_destroy(struct ipf *);
+void ipf_preprocess_conntrack(struct ipf *, struct dp_packet_batch *pb,
                               long long now, ovs_be16 dl_type, uint16_t zone,
                               uint32_t hash_basis);
 
-void ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
+void ipf_postprocess_conntrack(struct ipf *, struct dp_packet_batch *pb,
                                long long now, ovs_be16 dl_type);
 
-int ipf_set_enabled(void *ipf_, bool v6, bool enable);
-int ipf_set_min_frag(void *ipf_, bool v6, uint32_t value);
-int ipf_set_max_nfrags(void *ipf_, uint32_t value);
-int ipf_get_status(void *ipf_, struct ipf_status *ipf_status);
+int ipf_set_enabled(struct ipf *, bool v6, bool enable);
+int ipf_set_min_frag(struct ipf *, bool v6, uint32_t value);
+int ipf_set_max_nfrags(struct ipf *, uint32_t value);
+int ipf_get_status(struct ipf *, struct ipf_status *ipf_status);
 
 struct ipf_dump_ctx;
 int ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx);
-int ipf_dump_next(void *ipf_, struct ipf_dump_ctx *ipf_dump_ctx, char **dump);
+int ipf_dump_next(struct ipf *,
+                  struct ipf_dump_ctx *ipf_dump_ctx, char **dump);
 int ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx);
 
 #endif /* ipf.h */


More information about the dev mailing list