struct inet_frags {
struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
- /* This rwlock is a global lock (seperate per IPv4, IPv6 and
- * netfilter). Important to keep this on a seperate cacheline.
- * Its primarily a rebuild protection rwlock.
- */
- rwlock_t lock ____cacheline_aligned_in_smp;
struct work_struct frags_work;
unsigned int next_bucket;
/* The first call to hashfn is responsible to initialize
* rnd. This is best done with net_get_random_once.
+ *
+ * rnd_seqlock is used to let hash insertion detect
+ * when it needs to re-lookup the hash chain to use.
*/
u32 rnd;
+ seqlock_t rnd_seqlock;
int qsize;
unsigned int (*hashfn)(const struct inet_frag_queue *);
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key, unsigned int hash)
- __releases(&f->lock);
+ struct inet_frags *f, void *key, unsigned int hash);
+
void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
const char *prefix);
{
int i;
- /* Per bucket lock NOT needed here, due to write lock protection */
- write_lock_bh(&f->lock);
+ write_seqlock_bh(&f->rnd_seqlock);
if (!inet_frag_may_rebuild(f))
goto out;
struct hlist_node *n;
hb = &f->hash[i];
+ spin_lock(&hb->chain_lock);
+
hlist_for_each_entry_safe(q, n, &hb->chain, list) {
unsigned int hval = inet_frag_hashfn(f, q);
/* Relink to new hash chain. */
hb_dest = &f->hash[hval];
+
+ /* This is the only place where we take
+ * another chain_lock while already holding
+ * one. As this will not run concurrently,
+ * we cannot deadlock on hb_dest lock below, if its
+ * already locked it will be released soon since
+ * other caller cannot be waiting for hb lock
+ * that we've taken above.
+ */
+ spin_lock_nested(&hb_dest->chain_lock,
+ SINGLE_DEPTH_NESTING);
hlist_add_head(&q->list, &hb_dest->chain);
+ spin_unlock(&hb_dest->chain_lock);
}
}
+ spin_unlock(&hb->chain_lock);
}
f->rebuild = false;
f->last_rebuild_jiffies = jiffies;
out:
- write_unlock_bh(&f->lock);
+ write_sequnlock_bh(&f->rnd_seqlock);
}
static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
- read_lock_bh(&f->lock);
+ local_bh_disable();
for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
evicted += inet_evict_bucket(f, &f->hash[i]);
f->next_bucket = i;
- read_unlock_bh(&f->lock);
+ local_bh_enable();
+
if (f->rebuild && inet_frag_may_rebuild(f))
inet_frag_secret_rebuild(f);
}
spin_lock_init(&hb->chain_lock);
INIT_HLIST_HEAD(&hb->chain);
}
- rwlock_init(&f->lock);
+
+ seqlock_init(&f->rnd_seqlock);
f->last_rebuild_jiffies = 0;
}
EXPORT_SYMBOL(inet_frags_init);
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
+ unsigned int seq;
int i;
nf->low_thresh = 0;
+ local_bh_disable();
- read_lock_bh(&f->lock);
+evict_again:
+ seq = read_seqbegin(&f->rnd_seqlock);
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]);
- read_unlock_bh(&f->lock);
+ if (read_seqretry(&f->rnd_seqlock, seq))
+ goto evict_again;
+
+ local_bh_enable();
percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static struct inet_frag_bucket *
+get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+__acquires(hb->chain_lock)
{
struct inet_frag_bucket *hb;
- unsigned int hash;
+ unsigned int seq, hash;
+
+ restart:
+ seq = read_seqbegin(&f->rnd_seqlock);
- read_lock(&f->lock);
hash = inet_frag_hashfn(f, fq);
hb = &f->hash[hash];
spin_lock(&hb->chain_lock);
+ if (read_seqretry(&f->rnd_seqlock, seq)) {
+ spin_unlock(&hb->chain_lock);
+ goto restart;
+ }
+
+ return hb;
+}
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ struct inet_frag_bucket *hb;
+
+ hb = get_frag_bucket_locked(fq, f);
hlist_del(&fq->list);
spin_unlock(&hb->chain_lock);
-
- read_unlock(&f->lock);
}
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_queue *qp_in, struct inet_frags *f,
void *arg)
{
- struct inet_frag_bucket *hb;
+ struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp;
- unsigned int hash;
-
- read_lock(&f->lock); /* Protects against hash rebuild */
- /*
- * While we stayed w/o the lock other CPU could update
- * the rnd seed, so we need to re-calculate the hash
- * chain. Fortunatelly the qp_in can be used to get one.
- */
- hash = inet_frag_hashfn(f, qp_in);
- hb = &f->hash[hash];
- spin_lock(&hb->chain_lock);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
- * such entry could be created on other cpu, while we
- * released the hash bucket lock.
+ * such entry could have been created on other cpu before
+ * we acquired hash bucket lock.
*/
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
hlist_add_head(&qp->list, &hb->chain);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return qp;
}
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash)
- __releases(&f->lock)
{
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return q;
}
depth++;
}
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
return inet_frag_create(nf, f, key);
if (inet_frag_may_rebuild(f)) {
- f->rebuild = true;
+ if (!f->rebuild)
+ f->rebuild = true;
inet_frag_schedule_worker(f);
}