powerpc/kvm: Handle transparent hugepage in KVM
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Thu, 20 Jun 2013 09:00:19 +0000 (14:30 +0530)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Fri, 21 Jun 2013 06:01:55 +0000 (16:01 +1000)
We can find pte that are splitting while walking page tables. Return
None pte in that case.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c

index 9c1ff330c8053563545b9a0e7cfc72e62798365e..a1ecb14e4442d564fac6208e4bc445825beb0ff6 100644 (file)
@@ -159,36 +159,46 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
 }
 
 /*
- * Lock and read a linux PTE.  If it's present and writable, atomically
- * set dirty and referenced bits and return the PTE, otherwise return 0.
+ * If it's present and writable, atomically set dirty and referenced bits and
+ * return the PTE, otherwise return 0. If we find a transparent hugepage
+ * and if it is marked splitting we return 0;
  */
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
+                                                unsigned int hugepage)
 {
-       pte_t pte, tmp;
-
-       /* wait until _PAGE_BUSY is clear then set it atomically */
-       __asm__ __volatile__ (
-               "1:     ldarx   %0,0,%3\n"
-               "       andi.   %1,%0,%4\n"
-               "       bne-    1b\n"
-               "       ori     %1,%0,%4\n"
-               "       stdcx.  %1,0,%3\n"
-               "       bne-    1b"
-               : "=&r" (pte), "=&r" (tmp), "=m" (*p)
-               : "r" (p), "i" (_PAGE_BUSY)
-               : "cc");
-
-       if (pte_present(pte)) {
-               pte = pte_mkyoung(pte);
-               if (writing && pte_write(pte))
-                       pte = pte_mkdirty(pte);
-       }
+       pte_t old_pte, new_pte = __pte(0);
+
+       while (1) {
+               old_pte = pte_val(*ptep);
+               /*
+                * wait until _PAGE_BUSY is clear then set it atomically
+                */
+               if (unlikely(old_pte & _PAGE_BUSY)) {
+                       cpu_relax();
+                       continue;
+               }
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+               /* If hugepage and is trans splitting return None */
+               if (unlikely(hugepage &&
+                            pmd_trans_splitting(pte_pmd(old_pte))))
+                       return __pte(0);
+#endif
+               /* If pte is not present return None */
+               if (unlikely(!(old_pte & _PAGE_PRESENT)))
+                       return __pte(0);
 
-       *p = pte;       /* clears _PAGE_BUSY */
+               new_pte = pte_mkyoung(old_pte);
+               if (writing && pte_write(old_pte))
+                       new_pte = pte_mkdirty(new_pte);
 
-       return pte;
+               if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte,
+                                            new_pte))
+                       break;
+       }
+       return new_pte;
 }
 
+
 /* Return HPTE cache control bits corresponding to Linux pte bits */
 static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 {
index 5880dfb31074895816af634620e736cd51985bc2..710d31317d812efe73086e33a89bcdced65ba349 100644 (file)
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                }
                /* if the guest wants write access, see if that is OK */
                if (!writing && hpte_is_writable(r)) {
+                       unsigned int hugepage_shift;
                        pte_t *ptep, pte;
 
                        /*
@@ -683,9 +684,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         */
                        rcu_read_lock_sched();
                        ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-                                                        hva, NULL);
-                       if (ptep && pte_present(*ptep)) {
-                               pte = kvmppc_read_update_linux_pte(ptep, 1);
+                                                        hva, &hugepage_shift);
+                       if (ptep) {
+                               pte = kvmppc_read_update_linux_pte(ptep, 1,
+                                                          hugepage_shift);
                                if (pte_write(pte))
                                        write_ok = 1;
                        }
index dcf892d25a56b6cc05dbf2de8d8311feda876b3c..fc25689a9f35076e61d83ca024a08e2bdf7564c1 100644 (file)
@@ -139,20 +139,18 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
 {
        pte_t *ptep;
        unsigned long ps = *pte_sizep;
-       unsigned int shift;
+       unsigned int hugepage_shift;
 
-       ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+       ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
        if (!ptep)
                return __pte(0);
-       if (shift)
-               *pte_sizep = 1ul << shift;
+       if (hugepage_shift)
+               *pte_sizep = 1ul << hugepage_shift;
        else
                *pte_sizep = PAGE_SIZE;
        if (ps > *pte_sizep)
                return __pte(0);
-       if (!pte_present(*ptep))
-               return __pte(0);
-       return kvmppc_read_update_linux_pte(ptep, writing);
+       return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
 }
 
 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)