x86-64: Handle byte-wise tail copying in memcpy() without a loop

author Jan Beulich <JBeulich@suse.com>

Thu, 26 Jan 2012 15:55:32 +0000 (15:55 +0000)

committer Ingo Molnar <mingo@elte.hu>

Thu, 26 Jan 2012 20:19:20 +0000 (21:19 +0100)
author Jan Beulich <JBeulich@suse.com>
Thu, 26 Jan 2012 15:55:32 +0000 (15:55 +0000)
committer Ingo Molnar <mingo@elte.hu>
Thu, 26 Jan 2012 20:19:20 +0000 (21:19 +0100)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S

index 1235b04a9a60c5ef8eb9fe2213945fa4374085d6..1c273be7c97eded64736d60ba10c22ed09f3ceef 100644 (file)
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -164,18 +164,19 @@ ENTRY(memcpy)
         retq
         .p2align 4
  .Lless_3bytes:
-       cmpl $0, %edx
-       je .Lend
+       subl $1, %edx
+       jb .Lend
         /*
          * Move data from 1 bytes to 3 bytes.
          */
-.Lloop_1:
-       movb (%rsi), %r8b
-       movb %r8b, (%rdi)
-       incq %rdi
-       incq %rsi
-       decl %edx
-       jnz .Lloop_1
+       movzbl (%rsi), %ecx
+       jz .Lstore_1byte
+       movzbq 1(%rsi), %r8
+       movzbq (%rsi, %rdx), %r9
+       movb %r8b, 1(%rdi)
+       movb %r9b, (%rdi, %rdx)
+.Lstore_1byte:
+       movb %cl, (%rdi)
  
  .Lend:
         retq
author	Jan Beulich <JBeulich@suse.com>
	Thu, 26 Jan 2012 15:55:32 +0000 (15:55 +0000)
committer	Ingo Molnar <mingo@elte.hu>
	Thu, 26 Jan 2012 20:19:20 +0000 (21:19 +0100)