This change adds the configuration option CONFIG_LTO_CLANG, and
build system support for clang's Link Time Optimization (LTO). In
preparation for LTO support for other compilers, potentially common
parts of the changes are gated behind CONFIG_LTO instead.
With -flto, instead of object files, clang produces LLVM bitcode,
which is compiled into a native object at link time, allowing the
final binary to be optimized globally. For more details, see:
https://llvm.org/docs/LinkTimeOptimization.html
While the kernel normally uses GNU ld for linking, LLVM supports LTO
only with lld or GNU gold linkers. This patch set assumes gold will
be used with the LLVMgold plug-in to perform the LTO link step. Due
to potential incompatibilities with GNU ld, this change also adds
LDFINAL_vmlinux for using a different linker for the vmlinux_link
step, and defaults to using GNU ld.
Assuming LLVMgold.so is in LD_LIBRARY_PATH and CONFIG_LTO_CLANG has
been selected, an LTO kernel can be built simply by running make
CC=clang. LTO requires clang >= 5.0 and gold from binutils >= 2.27.
Bug:
62093296
Bug:
67506682
Change-Id: Ibcd9fc7ec501b4f30b43b4877897615645f8655f
(am from https://patchwork.kernel.org/patch/
10060329/)
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
# Make variables (CC, etc...)
AS = $(CROSS_COMPILE)as
LD = $(CROSS_COMPILE)ld
+LDGOLD = $(CROSS_COMPILE)ld.gold
CC = $(CROSS_COMPILE)gcc
CPP = $(CC) -E
AR = $(CROSS_COMPILE)ar
CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,)
export CFLAGS_GCOV CFLAGS_KCOV
+# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
+# ar/cc/ld-* macros return correct values.
+ifdef CONFIG_LTO_CLANG
+# use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link
+LDFINAL_vmlinux := $(LD)
+LD := $(LDGOLD)
+LDFLAGS += -plugin LLVMgold.so
+# use llvm-ar for building symbol tables from IR files, and llvm-dis instead
+# of objdump for processing symbol versions and exports
+LLVM_AR := llvm-ar
+LLVM_DIS := llvm-dis
+export LLVM_AR LLVM_DIS
+endif
+
# The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
# values of the respective KBUILD_* variables
ARCH_CPPFLAGS :=
KBUILD_CFLAGS += $(call cc-option,-fdata-sections,)
endif
+ifdef CONFIG_LTO_CLANG
+lto-clang-flags := -flto -fvisibility=hidden
+
+# allow disabling only clang LTO where needed
+DISABLE_LTO_CLANG := -fno-lto -fvisibility=default
+export DISABLE_LTO_CLANG
+endif
+
+ifdef CONFIG_LTO
+lto-flags := $(lto-clang-flags)
+KBUILD_CFLAGS += $(lto-flags)
+
+DISABLE_LTO := $(DISABLE_LTO_CLANG)
+export DISABLE_LTO
+
+# LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override
+# the linker and flags for vmlinux_link.
+export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
CHECKFLAGS += $(NOSTDINC_FLAGS)
# CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
PHONY += prepare-compiler-check
prepare-compiler-check: FORCE
+# Make sure we're using a supported toolchain with LTO_CLANG
+ifdef CONFIG_LTO_CLANG
+ ifneq ($(call clang-ifversion, -ge, 0500, y), y)
+ @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1
+ endif
+ ifneq ($(call gold-ifversion, -ge, 112000000, y), y)
+ @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1
+ endif
+endif
+# Make sure compiler supports LTO flags
+ifdef lto-flags
+ ifeq ($(call cc-option, $(lto-flags)),)
+ @echo Cannot use CONFIG_LTO: $(lto-flags) not supported by compiler \
+ >&2 && exit 1
+ endif
+endif
# Make sure compiler supports requested stack protector flag.
ifdef stackp-name
ifeq ($(call cc-option, $(stackp-flag)),)
-o -name modules.builtin -o -name '.tmp_*.o.*' \
-o -name '*.c.[012]*.*' \
-o -name '*.ll' \
- -o -name '*.gcno' \) -type f -print | xargs rm -f
+ -o -name '*.gcno' \
+ -o -name '*.*.symversions' \) -type f -print | xargs rm -f
# Generate tags for editors
# ---------------------------------------------------------------------------
sections (e.g., '.text.init'). Typically '.' in section names
is used to distinguish them from label names / C identifiers.
+config LTO
+ def_bool n
+
+config ARCH_SUPPORTS_LTO_CLANG
+ bool
+ help
+ An architecture should select this option it supports:
+ - compiling with clang,
+ - compiling inline assembly with clang's integrated assembler,
+ - and linking with either lld or GNU gold w/ LLVMgold.
+
+choice
+ prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
+ default LTO_NONE
+ help
+ This option turns on Link-Time Optimization (LTO).
+
+config LTO_NONE
+ bool "None"
+
+config LTO_CLANG
+ bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)"
+ depends on ARCH_SUPPORTS_LTO_CLANG
+ depends on !FTRACE_MCOUNT_RECORD
+ select LTO
+ select THIN_ARCHIVES
+ select LD_DEAD_CODE_DATA_ELIMINATION
+ help
+ This option enables clang's Link Time Optimization (LTO), which allows
+ the compiler to optimize the kernel globally at link time. If you
+ enable this option, the compiler generates LLVM IR instead of object
+ files, and the actual compilation from IR occurs at the LTO link step,
+ which may take several minutes.
+
+ If you select this option, you must compile the kernel with clang >=
+ 5.0 (make CC=clang) and GNU gold from binutils >= 2.27, and have the
+ LLVMgold plug-in in LD_LIBRARY_PATH.
+
+endchoice
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
+ifdef CONFIG_LTO_CLANG
+# Generate .o.symversions files for each .o with exported symbols, and link these
+# to the kernel and/or modules at the end.
+cmd_modversions_c = \
+ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \
+ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
+ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+ > $(@D)/$(@F).symversions; \
+ fi; \
+ else \
+ if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
+ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+ > $(@D)/$(@F).symversions; \
+ fi; \
+ fi; \
+ mv -f $(@D)/.tmp_$(@F) $@;
+else
cmd_modversions_c = \
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
mv -f $(@D)/.tmp_$(@F) $@; \
fi;
endif
+endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
ifdef BUILD_C_RECORDMCOUNT
#
ifdef builtin-target
+ifdef CONFIG_LTO_CLANG
+ ifdef CONFIG_MODVERSIONS
+ # combine symversions for later processing
+ update_lto_symversions = \
+ rm -f $@.symversions; \
+ for i in $(filter-out FORCE,$^); do \
+ if [ -f $$i.symversions ]; then \
+ cat $$i.symversions \
+ >> $@.symversions; \
+ fi; \
+ done;
+ endif
+ # rebuild the symbol table with llvm-ar to include IR files
+ update_lto_symtable = ; \
+ mv -f $@ $@.tmp; \
+ $(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \
+ $$($(AR) t $@.tmp); \
+ rm -f $@.tmp
+endif
+
ifdef CONFIG_THIN_ARCHIVES
- cmd_make_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
+ cmd_make_builtin = $(update_lto_symversions) \
+ rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
cmd_make_empty_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
quiet_cmd_link_o_target = AR $@
else
quiet_cmd_link_l_target = AR $@
ifdef CONFIG_THIN_ARCHIVES
- cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)
+ cmd_link_l_target = \
+ $(update_lto_symversions) \
+ rm -f $@; \
+ $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \
+ $(update_lto_symtable)
else
cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
endif
ref_prefix = EXTERN(
endif
-quiet_cmd_export_list = EXPORTS $@
-cmd_export_list = $(OBJDUMP) -h $< | \
- sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/$(ref_prefix)\1)/p' >$(ksyms-lds);\
- rm -f $(dummy-object);\
+filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ "]*\).*/$(ref_prefix)\1)/p'
+link_export_list = rm -f $(dummy-object);\
echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
$(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\
rm $(dummy-object) $(ksyms-lds)
+quiet_cmd_export_list = EXPORTS $@
+
+ifdef CONFIG_LTO_CLANG
+# objdump doesn't understand IR files and llvm-dis doesn't support archives,
+# so we'll walk through each file in the archive separately
+cmd_export_list = \
+ rm -f $(ksyms-lds); \
+ for o in $$($(AR) t $<); do \
+ if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \
+ $(OBJDUMP) -h $$o | \
+ $(filter_export_list) \
+ >>$(ksyms-lds); \
+ else \
+ $(LLVM_DIS) -o=- $$o | \
+ $(filter_export_list) \
+ >>$(ksyms-lds); \
+ fi; \
+ done; \
+ $(link_export_list)
+else
+cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \
+ $(link_export_list)
+endif
+
$(obj)/lib-ksyms.o: $(lib-target) FORCE
$(call if_changed,export_list)
ifdef CONFIG_THIN_ARCHIVES
quiet_cmd_link_multi-y = AR $@
- cmd_link_multi-y = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps)
+ cmd_link_multi-y = $(update_lto_symversions) \
+ rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \
+ $(update_lto_symtable)
else
quiet_cmd_link_multi-y = LD $@
cmd_link_multi-y = $(cmd_link_multi-link)
endif
quiet_cmd_link_multi-m = LD [M] $@
-cmd_link_multi-m = $(cmd_link_multi-link)
+
+ifdef CONFIG_LTO_CLANG
+ # don't compile IR until needed
+ cmd_link_multi-m = $(cmd_link_multi-y)
+else
+ cmd_link_multi-m = $(cmd_link_multi-link)
+endif
$(multi-used-y): FORCE
$(call if_changed,link_multi-y)
-$(call multi_depend, $(multi-used-y), .o, -objs -y)
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
@{ echo $(@:.o=.ko); echo $(link_multi_deps); \
$(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
+
+$(call multi_depend, $(multi-used-y), .o, -objs -y)
$(call multi_depend, $(multi-used-m), .o, -objs -y -m)
targets += $(multi-used-y) $(multi-used-m)
MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
+# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we
+# need to link them into actual objects before passing them to modpost
+modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
+
+ifdef CONFIG_LTO_CLANG
+quiet_cmd_cc_lto_link_modules = LD [M] $@
+cmd_cc_lto_link_modules = \
+ $(LD) $(ld_flags) -r -o $(@) \
+ $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \
+ echo -T $(@:$(modpost-ext).o=.o.symversions)) \
+ --whole-archive $(filter-out FORCE,$^)
+
+$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
+ $(call if_changed,cc_lto_link_modules)
+endif
+
# We can go over command line length here, so be careful.
quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
- cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T -
+ cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T -
PHONY += __modpost
-__modpost: $(modules:.ko=.o) FORCE
+__modpost: $(modules:.ko=$(modpost-ext).o) FORCE
$(call cmd,modpost) $(wildcard vmlinux)
quiet_cmd_kernel-mod = MODPOST $@
$(call cmd,kernel-mod)
# Declare generated files as targets for modpost
-$(modules:.ko=.mod.c): __modpost ;
-
+$(modules:.ko=$(modpost-ext).mod.c): __modpost ;
# Step 5), compile all *.mod.c files
cmd_cc_o_c = $(CC) $(c_flags) $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE) \
-c -o $@ $<
-$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
+$(modules:.ko=.mod.o): %.mod.o: %$(modpost-ext).mod.c FORCE
$(call if_changed_dep,cc_o_c)
-targets += $(modules:.ko=.mod.o)
+targets += $(modules:.ko=$(modpost-ext).mod.o)
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
# Step 6), final link of the modules with optional arch pass after final link
quiet_cmd_ld_ko_o = LD [M] $@
+
+ifdef CONFIG_LTO_CLANG
+ cmd_ld_ko_o = \
+ $(LD) -r $(LDFLAGS) \
+ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
+ $(shell [ -s $(@:.ko=.o.symversions) ] && \
+ echo -T $(@:.ko=.o.symversions)) \
+ -o $@ --whole-archive \
+ $(filter-out FORCE,$(^:$(modpost-ext).o=.o))
+else
cmd_ld_ko_o = \
$(LD) -r $(LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
-o $@ $(filter-out FORCE,$^) ; \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+endif
-$(modules): %.ko :%.o %.mod.o FORCE
+$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE
+$(call if_changed,ld_ko_o)
targets += $(modules)
${AR} rcsTP${KBUILD_ARFLAGS} built-in.o \
${KBUILD_VMLINUX_INIT} \
${KBUILD_VMLINUX_MAIN}
+
+ if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ mv -f built-in.o built-in.o.tmp
+ ${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.o $(${AR} t built-in.o.tmp)
+ rm -f built-in.o.tmp
+ fi
+ fi
+}
+
+# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
+# .tmp_symversions
+modversions()
+{
+ if [ -z "${CONFIG_LTO_CLANG}" ]; then
+ return
fi
+
+ if [ -z "${CONFIG_MODVERSIONS}" ]; then
+ return
+ fi
+
+ rm -f .tmp_symversions
+
+ for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do
+ for o in $(${AR} t $a); do
+ if [ -f ${o}.symversions ]; then
+ cat ${o}.symversions >> .tmp_symversions
+ fi
+ done
+ done
+
+ echo "-T .tmp_symversions"
}
# Link of vmlinux.o used for section mismatch analysis
${KBUILD_VMLINUX_LIBS} \
--end-group"
fi
- ${LD} ${LDFLAGS} -r -o ${1} ${objects}
+
+ if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ # This might take a while, so indicate that we're doing
+ # an LTO link
+ info LTO vmlinux.o
+ else
+ info LD vmlinux.o
+ fi
+
+ ${LD} ${LDFLAGS} -r -o ${1} $(modversions) ${objects}
}
# Link of vmlinux
local objects
if [ "${SRCARCH}" != "um" ]; then
- if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
- objects="--whole-archive \
+ local ld=${LD}
+ local ldflags="${LDFLAGS} ${LDFLAGS_vmlinux}"
+
+ if [ -n "${LDFINAL_vmlinux}" ]; then
+ ld=${LDFINAL_vmlinux}
+ ldflags="${LDFLAGS_FINAL_vmlinux} ${LDFLAGS_vmlinux}"
+ fi
+
+ if [[ -n "${CONFIG_THIN_ARCHIVES}" && -z "${CONFIG_LTO_CLANG}" ]]; then
+ objects="--whole-archive \
built-in.o \
--no-whole-archive \
--start-group \
${1}"
fi
- ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
- -T ${lds} ${objects}
+ ${ld} ${ldflags} -o ${2} -T ${lds} ${objects}
else
if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
objects="-Wl,--whole-archive \
fi
}
-
# Create ${2} .o file with all symbols from the ${1} object file
kallsyms()
{
rm -f .tmp_System.map
rm -f .tmp_kallsyms*
rm -f .tmp_version
+ rm -f .tmp_symversions
rm -f .tmp_vmlinux*
rm -f built-in.o
rm -f System.map
archive_builtin
#link vmlinux.o
-info LD vmlinux.o
modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches
${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
+if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ # Re-use vmlinux.o, so we can avoid the slow LTO link step in
+ # vmlinux_link
+ KBUILD_VMLINUX_INIT=
+ KBUILD_VMLINUX_MAIN=vmlinux.o
+ KBUILD_VMLINUX_LIBS=
+fi
+
kallsymso=""
kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then