xfs: lockless per-ag lookups
authorDave Chinner <dchinner@redhat.com>
Wed, 22 Sep 2010 00:47:20 +0000 (10:47 +1000)
committerAlex Elder <aelder@sgi.com>
Mon, 18 Oct 2010 20:07:44 +0000 (15:07 -0500)
When we start taking a reference to the per-ag for every cached
buffer in the system, kernel lockstat profiling on an 8-way create
workload shows the mp->m_perag_lock has higher acquisition rates
than the inode lock and has significantly more contention. That is,
it becomes the highest contended lock in the system.

The perag lookup is trivial to convert to lock-less RCU lookups
because perag structures never go away. Hence the only thing we need
to protect against is tree structure changes during a grow. This can
be done simply by replacing the locking in xfs_perag_get() with RCU
read locking. This removes the mp->m_perag_lock completely from this
path.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/xfs_ag.h
fs/xfs/xfs_mount.c

index 81976ffed7d6f031f1bef0d7a5995cbbebbed69b..3a1d229b4784fa1489f0cca54fb0e1bd16c63467 100644 (file)
@@ -150,17 +150,17 @@ xfs_inode_ag_iter_next_pag(
                int found;
                int ref;
 
-               spin_lock(&mp->m_perag_lock);
+               rcu_read_lock();
                found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
                                (void **)&pag, *first, 1, tag);
                if (found <= 0) {
-                       spin_unlock(&mp->m_perag_lock);
+                       rcu_read_unlock();
                        return NULL;
                }
                *first = pag->pag_agno + 1;
                /* open coded pag reference increment */
                ref = atomic_inc_return(&pag->pag_ref);
-               spin_unlock(&mp->m_perag_lock);
+               rcu_read_unlock();
                trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
        } else {
                pag = xfs_perag_get(mp, *first);
index 4917d4eed4edaeac56edffc1d65e4b7bc92acaa7..51c42c202bf11106228cf3592e71c0cd78fa5ee0 100644 (file)
@@ -230,6 +230,9 @@ typedef struct xfs_perag {
        rwlock_t        pag_ici_lock;   /* incore inode lock */
        struct radix_tree_root pag_ici_root;    /* incore inode cache root */
        int             pag_ici_reclaimable;    /* reclaimable inodes */
+
+       /* for rcu-safe freeing */
+       struct rcu_head rcu_head;
 #endif
        int             pagb_count;     /* pagb slots in use */
 } xfs_perag_t;
index 00c7a876807df0f3c384fe1316f914572f9ee937..14fc6e9e1816c0657a3fb8a700e0d36ab415289a 100644 (file)
@@ -199,6 +199,8 @@ xfs_uuid_unmount(
 
 /*
  * Reference counting access wrappers to the perag structures.
+ * Because we never free per-ag structures, the only thing we
+ * have to protect against changes is the tree structure itself.
  */
 struct xfs_perag *
 xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -206,13 +208,13 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
        struct xfs_perag        *pag;
        int                     ref = 0;
 
-       spin_lock(&mp->m_perag_lock);
+       rcu_read_lock();
        pag = radix_tree_lookup(&mp->m_perag_tree, agno);
        if (pag) {
                ASSERT(atomic_read(&pag->pag_ref) >= 0);
                ref = atomic_inc_return(&pag->pag_ref);
        }
-       spin_unlock(&mp->m_perag_lock);
+       rcu_read_unlock();
        trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
        return pag;
 }
@@ -227,10 +229,18 @@ xfs_perag_put(struct xfs_perag *pag)
        trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
 }
 
+STATIC void
+__xfs_free_perag(
+       struct rcu_head *head)
+{
+       struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+
+       ASSERT(atomic_read(&pag->pag_ref) == 0);
+       kmem_free(pag);
+}
+
 /*
- * Free up the resources associated with a mount structure.  Assume that
- * the structure was initially zeroed, so we can tell which fields got
- * initialized.
+ * Free up the per-ag resources associated with the mount structure.
  */
 STATIC void
 xfs_free_perag(
@@ -242,10 +252,9 @@ xfs_free_perag(
        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
                spin_lock(&mp->m_perag_lock);
                pag = radix_tree_delete(&mp->m_perag_tree, agno);
-               ASSERT(pag);
-               ASSERT(atomic_read(&pag->pag_ref) == 0);
                spin_unlock(&mp->m_perag_lock);
-               kmem_free(pag);
+               ASSERT(pag);
+               call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }