powerpc: memcpy optimization for 64bit LE
authorPhilippe Bergheaud <felix@linux.vnet.ibm.com>
Tue, 29 Apr 2014 23:12:01 +0000 (09:12 +1000)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 30 Apr 2014 05:26:18 +0000 (15:26 +1000)
Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
Once booted the feature fixup code switches over to the VMX copy loops
(which are already endian safe).

The question is what we do before that switch over. The base 64bit
memcpy takes alignment exceptions on POWER7 so we can't use it as is.
Fixing the causes of alignment exception would slow it down, because
we'd need to ensure all loads and stores are aligned either through
rotate tricks or bytewise loads and stores. Either would be bad for
all other 64bit platforms.

[ I simplified the loop a bit - Anton ]

Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/string.h
arch/powerpc/kernel/ppc_ksyms.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/memcpy_64.S

index 0dffad6bcc846725a273daff18593c7b03305060..e40010abcaf134f53bbcf639bf6999b856a42a2a 100644 (file)
@@ -10,9 +10,7 @@
 #define __HAVE_ARCH_STRNCMP
 #define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
-#ifdef __BIG_ENDIAN__
 #define __HAVE_ARCH_MEMCPY
-#endif
 #define __HAVE_ARCH_MEMMOVE
 #define __HAVE_ARCH_MEMCMP
 #define __HAVE_ARCH_MEMCHR
@@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *, const char *, __kernel_size_t);
 extern char * strcat(char *, const char *);
 extern void * memset(void *,int,__kernel_size_t);
-#ifdef __BIG_ENDIAN__
 extern void * memcpy(void *,const void *,__kernel_size_t);
-#endif
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
index 450850a49dced7919c3c2d349c2d70aae7cea0ad..48d17d6fca5b6e43ab1c77759f04a87f90c5aa2c 100644 (file)
@@ -155,9 +155,7 @@ EXPORT_SYMBOL(__cmpdi2);
 #endif
 long long __bswapdi2(long long);
 EXPORT_SYMBOL(__bswapdi2);
-#ifdef __BIG_ENDIAN__
 EXPORT_SYMBOL(memcpy);
-#endif
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
index 95a20e17dbff2c79557a18dba8f553a0f3a323d2..59fa2de9546d7fb0ba8ca05729ad78e75e0a9e88 100644 (file)
@@ -23,9 +23,7 @@ obj-y                 += checksum_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC64)    += checksum_wrappers_64.o
 endif
 
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
 obj-$(CONFIG_PPC64)            += memcpy_power7.o memcpy_64.o 
-endif
 
 obj-$(CONFIG_PPC_EMULATE_SSTEP)        += sstep.o ldstfp.o
 
index 72ad055168a333ed31ebd5c0c014ff00f6c7436a..dc4ba7953b9223af451dab4b7e4997fcaba8afd0 100644 (file)
        .align  7
 _GLOBAL(memcpy)
 BEGIN_FTR_SECTION
+#ifdef __LITTLE_ENDIAN__
+       cmpdi   cr7,r5,0
+#else
        std     r3,48(r1)       /* save destination pointer for return value */
+#endif
 FTR_SECTION_ELSE
 #ifndef SELFTEST
        b       memcpy_power7
 #endif
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+       /* dumb little-endian memcpy that will get replaced at runtime */
+       addi r9,r3,-1
+       addi r4,r4,-1
+       beqlr cr7
+       mtctr r5
+1:     lbzu r10,1(r4)
+       stbu r10,1(r9)
+       bdnz 1b
+       blr
+#else
        PPC_MTOCRF(0x01,r5)
        cmpldi  cr1,r5,16
        neg     r6,r3           # LS 3 bits = # bytes to 8-byte dest bdry
@@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
        stb     r0,0(r3)
 4:     ld      r3,48(r1)       /* return dest pointer */
        blr
+#endif