arm64: Use PoU cache instr for I/D coherency
authorAshok Kumar <ashoks@broadcom.com>
Thu, 17 Dec 2015 09:38:32 +0000 (01:38 -0800)
committerWill Deacon <will.deacon@arm.com>
Thu, 17 Dec 2015 11:07:13 +0000 (11:07 +0000)
In systems with three levels of cache(PoU at L1 and PoC at L3),
PoC cache flush instructions flushes L2 and L3 caches which could affect
performance.
For cache flushes for I and D coherency, PoU should suffice.
So changing all I and D coherency related cache flushes to PoU.

Introduced a new __clean_dcache_area_pou API for dcache flush till PoU
and provided a common macro for __flush_dcache_area and
__clean_dcache_area_pou.

Also, now in __sync_icache_dcache, icache invalidation for non-aliasing
VIPT icache is done only for that particular page instead of the earlier
__flush_icache_all.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/cacheflush.h
arch/arm64/mm/cache.S
arch/arm64/mm/flush.c
arch/arm64/mm/proc-macros.S

index 54efedaf331fda55478d001d860d6137be5d08e8..7fc294c3bc5baab31b13b1e23753d17835ce3b27 100644 (file)
@@ -68,6 +68,7 @@
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
 static inline void flush_cache_mm(struct mm_struct *mm)
index cfa44a6adc0ad5ec29f78228196b7e834b65df40..6df07069a0253013e254dbb1206debaa939a3526 100644 (file)
@@ -81,25 +81,31 @@ ENDPROC(__flush_cache_user_range)
 /*
  *     __flush_dcache_area(kaddr, size)
  *
- *     Ensure that the data held in the page kaddr is written back to the
- *     page in question.
+ *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     are cleaned and invalidated to the PoC.
  *
  *     - kaddr   - kernel address
  *     - size    - size in question
  */
 ENTRY(__flush_dcache_area)
-       dcache_line_size x2, x3
-       add     x1, x0, x1
-       sub     x3, x2, #1
-       bic     x0, x0, x3
-1:     dc      civac, x0                       // clean & invalidate D line / unified line
-       add     x0, x0, x2
-       cmp     x0, x1
-       b.lo    1b
-       dsb     sy
+       dcache_by_line_op civac, sy, x0, x1, x2, x3
        ret
 ENDPIPROC(__flush_dcache_area)
 
+/*
+ *     __clean_dcache_area_pou(kaddr, size)
+ *
+ *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     are cleaned to the PoU.
+ *
+ *     - kaddr   - kernel address
+ *     - size    - size in question
+ */
+ENTRY(__clean_dcache_area_pou)
+       dcache_by_line_op cvau, ish, x0, x1, x2, x3
+       ret
+ENDPROC(__clean_dcache_area_pou)
+
 /*
  *     __inval_cache_range(start, end)
  *     - start   - start address of region
index c26b804015e80c46e1380d0a1af7f8f439c55405..46649d6e6c5a5608caa84015d3ce4f09d3d47eee 100644 (file)
@@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
                __flush_icache_all();
 }
 
+static void sync_icache_aliases(void *kaddr, unsigned long len)
+{
+       unsigned long addr = (unsigned long)kaddr;
+
+       if (icache_is_aliasing()) {
+               __clean_dcache_area_pou(kaddr, len);
+               __flush_icache_all();
+       } else {
+               flush_icache_range(addr, addr + len);
+       }
+}
+
 static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
                                unsigned long uaddr, void *kaddr,
                                unsigned long len)
 {
-       if (vma->vm_flags & VM_EXEC) {
-               unsigned long addr = (unsigned long)kaddr;
-               if (icache_is_aliasing()) {
-                       __flush_dcache_area(kaddr, len);
-                       __flush_icache_all();
-               } else {
-                       flush_icache_range(addr, addr + len);
-               }
-       }
+       if (vma->vm_flags & VM_EXEC)
+               sync_icache_aliases(kaddr, len);
 }
 
 /*
@@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
        if (!page_mapping(page))
                return;
 
-       if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
-               __flush_dcache_area(page_address(page),
-                               PAGE_SIZE << compound_order(page));
+       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+               sync_icache_aliases(page_address(page),
+                                   PAGE_SIZE << compound_order(page));
+       else if (icache_is_aivivt())
                __flush_icache_all();
-       } else if (icache_is_aivivt()) {
-               __flush_icache_all();
-       }
 }
 
 /*
index 4c4d93c4bf65b154fd0df25c4b8165593ff044be..146bd99a7532bcc6f53ad509a839561640eeaf3e 100644 (file)
        bfi     \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
 #endif
        .endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [kaddr, kaddr + size)
+ *
+ *     op:             operation passed to dc instruction
+ *     domain:         domain used in dsb instruciton
+ *     kaddr:          starting virtual address of the region
+ *     size:           size of the region
+ *     Corrupts:       kaddr, size, tmp1, tmp2
+ */
+       .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+       dcache_line_size \tmp1, \tmp2
+       add     \size, \kaddr, \size
+       sub     \tmp2, \tmp1, #1
+       bic     \kaddr, \kaddr, \tmp2
+9998:  dc      \op, \kaddr
+       add     \kaddr, \kaddr, \tmp1
+       cmp     \kaddr, \size
+       b.lo    9998b
+       dsb     \domain
+       .endm