x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations

author Dan Williams <dan.j.williams@intel.com>

Mon, 29 May 2017 19:22:50 +0000 (12:22 -0700)

committer Dan Williams <dan.j.williams@intel.com>

Fri, 9 Jun 2017 16:09:56 +0000 (09:09 -0700)
author Dan Williams <dan.j.williams@intel.com>
Mon, 29 May 2017 19:22:50 +0000 (12:22 -0700)
committer Dan Williams <dan.j.williams@intel.com>
Fri, 9 Jun 2017 16:09:56 +0000 (09:09 -0700)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 4ccfacc7232ab1ace21b8466ae73e4c7d18d3fba..bb273b2f50b5ef38d41d7a23f38d17ada47df091 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,6 +54,7 @@ config X86
         select ARCH_HAS_KCOV                    if X86_64
         select ARCH_HAS_MMIO_FLUSH
         select ARCH_HAS_PMEM_API                if X86_64
+       select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
         select ARCH_HAS_SET_MEMORY
         select ARCH_HAS_SG_CHAIN
         select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h

index 733bae07fb29151aab2ab8ef51af95d1e5d059ee..1f22bc277c455a2181551e5d304b80d0cf8d1c8f 100644 (file)
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
         return 0;
  }
  
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
+
  #endif /* __KERNEL__ */
  
  #endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h

index c5504b9a472e16ff7d514da5ffa8f16e5f6c40fb..b16f6a1d8b26708d41a5a4d2b6111b543ac0b1c2 100644 (file)
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne
  extern long __copy_user_nocache(void *dst, const void __user *src,
                                 unsigned size, int zerorest);
  
+extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
+extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+                          size_t len);
+
  static inline int
  __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
                                   unsigned size)
@@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
         return __copy_user_nocache(dst, src, size, 0);
  }
  
+static inline int
+__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+       kasan_check_write(dst, size);
+       return __copy_user_flushcache(dst, src, size);
+}
+
  unsigned long
  copy_user_handle_tail(char *to, char *from, unsigned len);
  
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c

index 3b7c40a2e3e1a42013bb5436cff93c998c371bc4..f42d2fd86ca3949257b8af5102e9ae422ecb3f5e 100644 (file)
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -7,6 +7,7 @@
   */
  #include <linux/export.h>
  #include <linux/uaccess.h>
+#include <linux/highmem.h>
  
  /*
   * Zero Userspace
@@ -73,3 +74,130 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
         clac();
         return len;
  }
+
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+/**
+ * clean_cache_range - write back a cache range with CLWB
+ * @vaddr:     virtual start address
+ * @size:      number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
+ */
+static void clean_cache_range(void *addr, size_t size)
+{
+       u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+       unsigned long clflush_mask = x86_clflush_size - 1;
+       void *vend = addr + size;
+       void *p;
+
+       for (p = (void *)((unsigned long)addr & ~clflush_mask);
+            p < vend; p += x86_clflush_size)
+               clwb(p);
+}
+
+long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+       unsigned long flushed, dest = (unsigned long) dst;
+       long rc = __copy_user_nocache(dst, src, size, 0);
+
+       /*
+        * __copy_user_nocache() uses non-temporal stores for the bulk
+        * of the transfer, but we need to manually flush if the
+        * transfer is unaligned. A cached memory copy is used when
+        * destination or size is not naturally aligned. That is:
+        *   - Require 8-byte alignment when size is 8 bytes or larger.
+        *   - Require 4-byte alignment when size is 4 bytes.
+        */
+       if (size < 8) {
+               if (!IS_ALIGNED(dest, 4) || size != 4)
+                       clean_cache_range(dst, 1);
+       } else {
+               if (!IS_ALIGNED(dest, 8)) {
+                       dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+                       clean_cache_range(dst, 1);
+               }
+
+               flushed = dest - (unsigned long) dst;
+               if (size > flushed && !IS_ALIGNED(size - flushed, 8))
+                       clean_cache_range(dst + size - 1, 1);
+       }
+
+       return rc;
+}
+
+void memcpy_flushcache(void *_dst, const void *_src, size_t size)
+{
+       unsigned long dest = (unsigned long) _dst;
+       unsigned long source = (unsigned long) _src;
+
+       /* cache copy and flush to align dest */
+       if (!IS_ALIGNED(dest, 8)) {
+               unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
+
+               memcpy((void *) dest, (void *) source, len);
+               clean_cache_range((void *) dest, len);
+               dest += len;
+               source += len;
+               size -= len;
+               if (!size)
+                       return;
+       }
+
+       /* 4x8 movnti loop */
+       while (size >= 32) {
+               asm("movq    (%0), %%r8\n"
+                   "movq   8(%0), %%r9\n"
+                   "movq  16(%0), %%r10\n"
+                   "movq  24(%0), %%r11\n"
+                   "movnti  %%r8,   (%1)\n"
+                   "movnti  %%r9,  8(%1)\n"
+                   "movnti %%r10, 16(%1)\n"
+                   "movnti %%r11, 24(%1)\n"
+                   :: "r" (source), "r" (dest)
+                   : "memory", "r8", "r9", "r10", "r11");
+               dest += 32;
+               source += 32;
+               size -= 32;
+       }
+
+       /* 1x8 movnti loop */
+       while (size >= 8) {
+               asm("movq    (%0), %%r8\n"
+                   "movnti  %%r8,   (%1)\n"
+                   :: "r" (source), "r" (dest)
+                   : "memory", "r8");
+               dest += 8;
+               source += 8;
+               size -= 8;
+       }
+
+       /* 1x4 movnti loop */
+       while (size >= 4) {
+               asm("movl    (%0), %%r8d\n"
+                   "movnti  %%r8d,   (%1)\n"
+                   :: "r" (source), "r" (dest)
+                   : "memory", "r8");
+               dest += 4;
+               source += 4;
+               size -= 4;
+       }
+
+       /* cache copy for remaining bytes */
+       if (size) {
+               memcpy((void *) dest, (void *) source, size);
+               clean_cache_range((void *) dest, size);
+       }
+}
+EXPORT_SYMBOL_GPL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+               size_t len)
+{
+       char *from = kmap_atomic(page);
+
+       memcpy_flushcache(to, from + offset, len);
+       kunmap_atomic(from);
+}
+#endif
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index 656acb5d71660a6dfffaecc877bb46d76af08fb3..cbd5596e75627cd6ad2072a38fd6c43cfe0e50d6 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1842,8 +1842,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
                 }
  
                 if (rw)
-                       memcpy_to_pmem(mmio->addr.aperture + offset,
-                                       iobuf + copied, c);
+                       memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
                 else {
                         if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
                                 mmio_flush_range((void __force *)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c

index 7ceb5fa4f2a1343c0136cb56cdf887b4aeabac65..b8b9c8ca7862d1446667cac311ab2423242fbfea 100644 (file)
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -277,7 +277,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
                         rc = -EIO;
         }
  
-       memcpy_to_pmem(nsio->addr + offset, buf, size);
+       memcpy_flushcache(nsio->addr + offset, buf, size);
         nvdimm_flush(to_nd_region(ndns->dev.parent));
  
         return rc;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c

index c544d466ea51071a3c09a53544df61d8a1bae759..2f3aefe565c662954da47c461b55af2dcb26a588 100644 (file)
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,6 +29,7 @@
  #include <linux/pfn_t.h>
  #include <linux/slab.h>
  #include <linux/pmem.h>
+#include <linux/uio.h>
  #include <linux/dax.h>
  #include <linux/nd.h>
  #include "pmem.h"
@@ -80,7 +81,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
  {
         void *mem = kmap_atomic(page);
  
-       memcpy_to_pmem(pmem_addr, mem + off, len);
+       memcpy_flushcache(pmem_addr, mem + off, len);
         kunmap_atomic(mem);
  }
  
@@ -235,8 +236,15 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
         return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
  }
  
+static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+               void *addr, size_t bytes, struct iov_iter *i)
+{
+       return copy_from_iter_flushcache(addr, bytes, i);
+}
+
  static const struct dax_operations pmem_dax_ops = {
         .direct_access = pmem_dax_direct_access,
+       .copy_from_iter = pmem_copy_from_iter,
  };
  
  static void pmem_release_queue(void *q)
@@ -294,7 +302,8 @@ static int pmem_attach_disk(struct device *dev,
         dev_set_drvdata(dev, pmem);
         pmem->phys_addr = res->start;
         pmem->size = resource_size(res);
-       if (nvdimm_has_flush(nd_region) < 0)
+       if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE)
+                       || nvdimm_has_flush(nd_region) < 0)
                 dev_warn(dev, "unable to guarantee persistence of writes\n");
  
         if (!devm_request_mem_region(dev, res->start, resource_size(res),
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c

index b550edf2571f448df70e973ba9d6265ab894299c..985b0e11bd736ef44bfbbe17d7b345ce59e93193 100644 (file)
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1015,8 +1015,8 @@ void nvdimm_flush(struct nd_region *nd_region)
          * The first wmb() is needed to 'sfence' all previous writes
          * such that they are architecturally visible for the platform
          * buffer flush.  Note that we've already arranged for pmem
-        * writes to avoid the cache via arch_memcpy_to_pmem().  The
-        * final wmb() ensures ordering for the NVDIMM flush write.
+        * writes to avoid the cache via memcpy_flushcache().  The final
+        * wmb() ensures ordering for the NVDIMM flush write.
          */
         wmb();
         for (i = 0; i < nd_region->ndr_mappings; i++)
diff --git a/include/linux/dax.h b/include/linux/dax.h

index 5ec1f6c47716d6fe7c750456cef81cc0e321b1bc..bbe79ed90e2b15f8caf2748625fae44b33b5f928 100644 (file)
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -16,6 +16,9 @@ struct dax_operations {
          */
         long (*direct_access)(struct dax_device *, pgoff_t, long,
                         void **, pfn_t *);
+       /* copy_from_iter: dax-driver override for default copy_from_iter */
+       size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
+                       struct iov_iter *);
  };
  
  #if IS_ENABLED(CONFIG_DAX)
diff --git a/include/linux/string.h b/include/linux/string.h

index 537918f8a98eec239a8f16c7ae64cadcbc847789..7439d83eaa3365f3f67cbb70694a168554ac17b7 100644 (file)
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
         return 0;
  }
  #endif
+#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
+static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
+{
+       memcpy(dst, src, cnt);
+}
+#endif
  void *memchr_inv(const void *s, int c, size_t n);
  char *strreplace(char *s, char old, char new);
  
diff --git a/include/linux/uio.h b/include/linux/uio.h

index f2d36a3d30052db827fab68e9278fc1b693068ca..55cd54a0e94100cc71b4ddeb03d8b35392a32b7a 100644 (file)
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
  size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
  bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
  size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+/*
+ * Note, users like pmem that depend on the stricter semantics of
+ * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
+ * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
+ * destination is flushed from the cache on return.
+ */
+size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
+#else
+static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes,
+                                      struct iov_iter *i)
+{
+       return copy_from_iter_nocache(addr, bytes, i);
+}
+#endif
  bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
  size_t iov_iter_zero(size_t bytes, struct iov_iter *);
  unsigned long iov_iter_alignment(const struct iov_iter *i);
diff --git a/lib/Kconfig b/lib/Kconfig

index 0c8b78a9ae2ef97a1e83753146959b2d7b39d16d..2d1c4b3a085c28584e3788d7e03ddc15c5797f8c 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -548,6 +548,9 @@ config ARCH_HAS_SG_CHAIN
  config ARCH_HAS_PMEM_API
         bool
  
+config ARCH_HAS_UACCESS_FLUSHCACHE
+       bool
+
  config ARCH_HAS_MMIO_FLUSH
         bool
  
diff --git a/lib/iov_iter.c b/lib/iov_iter.c

index f835964c9485f147699609bf2c2dc1ffecb8833b..c9a69064462f8c0bf3b1a7b58623c14e7b0d371f 100644 (file)
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -615,6 +615,28 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
  }
  EXPORT_SYMBOL(copy_from_iter_nocache);
  
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
+{
+       char *to = addr;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
+       iterate_and_advance(i, bytes, v,
+               __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
+                                        v.iov_base, v.iov_len),
+               memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
+                                v.bv_offset, v.bv_len),
+               memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
+                       v.iov_len)
+       )
+
+       return bytes;
+}
+EXPORT_SYMBOL_GPL(copy_from_iter_flushcache);
+#endif
+
  bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
  {
         char *to = addr;
author	Dan Williams <dan.j.williams@intel.com>
	Mon, 29 May 2017 19:22:50 +0000 (12:22 -0700)
committer	Dan Williams <dan.j.williams@intel.com>
	Fri, 9 Jun 2017 16:09:56 +0000 (09:09 -0700)
arch/x86/Kconfig		patch \| blob \| blame \| history
arch/x86/include/asm/string_64.h		patch \| blob \| blame \| history
arch/x86/include/asm/uaccess_64.h		patch \| blob \| blame \| history
arch/x86/lib/usercopy_64.c		patch \| blob \| blame \| history
drivers/acpi/nfit/core.c		patch \| blob \| blame \| history
drivers/nvdimm/claim.c		patch \| blob \| blame \| history
drivers/nvdimm/pmem.c		patch \| blob \| blame \| history
drivers/nvdimm/region_devs.c		patch \| blob \| blame \| history
include/linux/dax.h		patch \| blob \| blame \| history
include/linux/string.h		patch \| blob \| blame \| history
include/linux/uio.h		patch \| blob \| blame \| history
lib/Kconfig		patch \| blob \| blame \| history
lib/iov_iter.c		patch \| blob \| blame \| history