ARCH_USE_BUILTIN_BSWAP will use __builtin_bswap16(), __builtin_bswap32()
and __builtin_bswap64() where available. This allows better instruction
scheduling. On pre-R2 processors it will result in 32 bit and 64 bit
swapping being performed in a call to a __bswapsi2() rsp. __bswapdi2()
functions, so we add these, too.
For a 4.2 kernel with GCC 4.9 this yields the following kernel sizes:
text data bss dec hex filename
3996071 155804 88992
4240867 40b5e3 vmlinux ip22 baseline
3985687 159900 88992
4234579 409d53 vmlinux ip22 + bswap patch
6913157 378552 251024
7542733 7317cd vmlinux ip27 baseline
6878581 378552 251024
7508157 7290bd vmlinux ip27 + bswap patch
5773777 268752 187424
6229953 5f0fc1 vmlinux malta baseline
5773401 268752 187424
6229577 5f0e49 vmlinux malta + bswap patch
Presumably the code size improvments yield better cache hit rate thus
better performance compensating for the extra function call but this
will still need to be benchmarked.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
+ select ARCH_USE_BUILTIN_BSWAP
select HAVE_CONTEXT_TRACKING
select HAVE_GENERIC_DMA_COHERENT
select HAVE_IDE
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
# libgcc-style stuff needed in the kernel
-obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o
+obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o
--- /dev/null
+#include <linux/module.h>
+
+unsigned long long __bswapdi2(unsigned long long u)
+{
+ return (((u) & 0xff00000000000000ull) >> 56) |
+ (((u) & 0x00ff000000000000ull) >> 40) |
+ (((u) & 0x0000ff0000000000ull) >> 24) |
+ (((u) & 0x000000ff00000000ull) >> 8) |
+ (((u) & 0x00000000ff000000ull) << 8) |
+ (((u) & 0x0000000000ff0000ull) << 24) |
+ (((u) & 0x000000000000ff00ull) << 40) |
+ (((u) & 0x00000000000000ffull) << 56);
+}
+
+EXPORT_SYMBOL(__bswapdi2);
--- /dev/null
+#include <linux/module.h>
+
+unsigned int __bswapsi2(unsigned int u)
+{
+ return (((u) & 0xff000000) >> 24) |
+ (((u) & 0x00ff0000) >> 8) |
+ (((u) & 0x0000ff00) << 8) |
+ (((u) & 0x000000ff) << 24);
+}
+
+EXPORT_SYMBOL(__bswapsi2);