x86: Separate out entry text section
authorJiri Olsa <jolsa@redhat.com>
Mon, 7 Mar 2011 18:10:39 +0000 (19:10 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 8 Mar 2011 16:22:11 +0000 (17:22 +0100)
Put x86 entry code into a separate link section: .entry.text.

Separating the entry text section seems to have performance
benefits - caused by more efficient instruction cache usage.

Running hackbench with perf stat --repeat showed that the change
compresses the icache footprint. The icache load miss rate went
down by about 15%:

 before patch:
         19417627  L1-icache-load-misses      ( +-   0.147% )

 after patch:
         16490788  L1-icache-load-misses      ( +-   0.180% )

The motivation of the patch was to fix a particular kprobes
bug that relates to the entry text section, the performance
advantage was discovered accidentally.

Whole perf output follows:

 - results for current tip tree:

  Performance counter stats for './hackbench/hackbench 10' (500 runs):

         19417627  L1-icache-load-misses      ( +-   0.147% )
       2676914223  instructions             #      0.497 IPC     ( +- 0.079% )
       5389516026  cycles                     ( +-   0.144% )

      0.206267711  seconds time elapsed   ( +-   0.138% )

 - results for current tip tree with the patch applied:

  Performance counter stats for './hackbench/hackbench 10' (500 runs):

         16490788  L1-icache-load-misses      ( +-   0.180% )
       2717734941  instructions             #      0.502 IPC     ( +- 0.079% )
       5414756975  cycles                     ( +-   0.148% )

      0.206747566  seconds time elapsed   ( +-   0.137% )

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: masami.hiramatsu.pt@hitachi.com
Cc: ananth@in.ibm.com
Cc: davem@davemloft.net
Cc: 2nddept-manager@sdl.hitachi.co.jp
LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/ia32/ia32entry.S
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/vmlinux.lds.S
include/asm-generic/sections.h
include/asm-generic/vmlinux.lds.h

index 518bb99c339480820fc3995b1456d29704d67f07..f729b2e9679c5130f28c5a5e6e96db79e96c1011 100644 (file)
@@ -25,6 +25,8 @@
 #define sysretl_audit ia32_ret_from_sys_call
 #endif
 
+       .section .entry.text, "ax"
+
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
 
        .macro IA32_ARG_FIXUP noebp=0
index c8b4efad7ebb080faeb56a9f6b539ef8cb6d7493..f5accf8eaa7850ad37cd052b4146e1e0cf3c688e 100644 (file)
@@ -65,6 +65,8 @@
 #define sysexit_audit  syscall_exit_work
 #endif
 
+       .section .entry.text, "ax"
+
 /*
  * We use macros for low-level operations which need to be overridden
  * for paravirtualization.  The following will never clobber any registers:
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
  */
 .section .init.rodata,"a"
 ENTRY(interrupt)
-.text
+.section .entry.text, "ax"
        .p2align 5
        .p2align CONFIG_X86_L1_CACHE_SHIFT
 ENTRY(irq_entries_start)
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
       .endif
       .previous
        .long 1b
-      .text
+      .section .entry.text, "ax"
 vector=vector+1
     .endif
   .endr
index aed1ffbeb0c9beaacbf0fe6bed4dc2714d12d22d..0a0ed794edb29b933d2d7d5869d614640e64f9ec 100644 (file)
@@ -61,6 +61,8 @@
 #define __AUDIT_ARCH_LE           0x40000000
 
        .code64
+       .section .entry.text, "ax"
+
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
  */
        .section .init.rodata,"a"
 ENTRY(interrupt)
-       .text
+       .section .entry.text
        .p2align 5
        .p2align CONFIG_X86_L1_CACHE_SHIFT
 ENTRY(irq_entries_start)
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
       .endif
       .previous
        .quad 1b
-      .text
+      .section .entry.text
 vector=vector+1
     .endif
   .endr
index bf4700755184e32d4b4e549bd19f4014caa46468..6d4341d5c52a354d341b6c19fc81170b4b60a331 100644 (file)
@@ -105,6 +105,7 @@ SECTIONS
                SCHED_TEXT
                LOCK_TEXT
                KPROBES_TEXT
+               ENTRY_TEXT
                IRQENTRY_TEXT
                *(.fixup)
                *(.gnu.warning)
index b3bfabc258f369b576d3660c41955521dbeb21ac..c1a1216e29ced8ad92dac7c089991e7bf97bd2c3 100644 (file)
@@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
 extern char _end[];
 extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
+extern char __entry_text_start[], __entry_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __start_rodata[], __end_rodata[];
 
index fe77e3395b40b0544ff9f939524ead2cb2263aba..906c3ceca9a2b245eeb8456e035f77a75b104af4 100644 (file)
                *(.kprobes.text)                                        \
                VMLINUX_SYMBOL(__kprobes_text_end) = .;
 
+#define ENTRY_TEXT                                                     \
+               ALIGN_FUNCTION();                                       \
+               VMLINUX_SYMBOL(__entry_text_start) = .;                 \
+               *(.entry.text)                                          \
+               VMLINUX_SYMBOL(__entry_text_end) = .;
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #define IRQENTRY_TEXT                                                  \
                ALIGN_FUNCTION();                                       \