x86, mem: Don't implement forward memmove() as memcpy()
authorMa, Ling <ling.ma@intel.com>
Mon, 23 Aug 2010 21:11:12 +0000 (14:11 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Mon, 23 Aug 2010 21:14:27 +0000 (14:14 -0700)
memmove() allow source and destination address to be overlap, but
there is no such limitation for memcpy().  Therefore, explicitly
implement memmove() in both the forwards and backward directions, to
give us the ability to optimize memcpy().

Signed-off-by: Ma Ling <ling.ma@intel.com>
LKML-Reference: <C10D3FB0CD45994C8A51FEC1227CE22F0E483AD86A@shsmsx502.ccr.corp.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/lib/memcpy_32.c
arch/x86/lib/memmove_64.c

index 5415a9d06f53b75c4a993b0bbe96508502691142..be424dfcf3654ab76da38d486fceb0c17dad0684 100644 (file)
@@ -25,19 +25,35 @@ void *memmove(void *dest, const void *src, size_t n)
        int d0, d1, d2;
 
        if (dest < src) {
-               memcpy(dest, src, n);
+               if ((dest + n) < src)
+                        return memcpy(dest, src, n);
+               else
+                       __asm__ __volatile__(
+                               "rep\n\t"
+                               "movsb\n\t"
+                               : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+                               :"0" (n),
+                                "1" (src),
+                                "2" (dest)
+                               :"memory");
+
        } else {
-               __asm__ __volatile__(
-                       "std\n\t"
-                       "rep\n\t"
-                       "movsb\n\t"
-                       "cld"
-                       : "=&c" (d0), "=&S" (d1), "=&D" (d2)
-                       :"0" (n),
-                        "1" (n-1+src),
-                        "2" (n-1+dest)
-                       :"memory");
+
+               if((src + count) < dest)
+                       return memcpy(dest, src, count);
+               else
+                       __asm__ __volatile__(
+                               "std\n\t"
+                               "rep\n\t"
+                               "movsb\n\t"
+                               "cld"
+                               : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+                               :"0" (n),
+                                "1" (n-1+src),
+                                "2" (n-1+dest)
+                               :"memory");
        }
+
        return dest;
 }
 EXPORT_SYMBOL(memmove);
index 0a33909bf12213dbb0945d057e5c7c537296074e..ecacc4b3d9e5056785daea45742db183f283e065 100644 (file)
@@ -8,13 +8,49 @@
 #undef memmove
 void *memmove(void *dest, const void *src, size_t count)
 {
+       unsigned long d0, d1, d2, d3;
        if (dest < src) {
-               return memcpy(dest, src, count);
+               if ((dest + count) < src)
+                        return memcpy(dest, src, count);
+               else
+                       __asm__ __volatile__(
+                               "movq %0, %3\n\t"
+                               "shr $3, %0\n\t"
+                               "andq $7, %3\n\t"
+                               "rep\n\t"
+                               "movsq\n\t"
+                               "movq %3, %0\n\t"
+                               "rep\n\t"
+                               "movsb"
+                               : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
+                               :"0" (count),
+                                "1" (src),
+                                "2" (dest)
+                               :"memory");
        } else {
-               char *p = dest + count;
-               const char *s = src + count;
-               while (count--)
-                       *--p = *--s;
+               if((src + count) < dest)
+                       return memcpy(dest, src, count);
+               else
+                       __asm__ __volatile__(
+                               "movq %0, %3\n\t"
+                               "lea -8(%1, %0), %1\n\t"
+                               "lea -8(%2, %0), %2\n\t"
+                               "shr $3, %0\n\t"
+                               "andq $7, %3\n\t"
+                               "std\n\t"
+                               "rep\n\t"
+                               "movsq\n\t"
+                               "lea 7(%1), %1\n\t"
+                               "lea 7(%2), %2\n\t"
+                               "movq %3, %0\n\t"
+                               "rep\n\t"
+                               "movsb\n\t"
+                               "cld"
+                               : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
+                               :"0" (count),
+                                "1" (src),
+                                "2" (dest)
+                               :"memory");
        }
        return dest;
 }