From dd66e38c35a96c60005769e8e25f878b56673f31 Mon Sep 17 00:00:00 2001 From: Gregory Szorc Date: Mon, 16 Jan 2023 09:50:57 -0800 Subject: [PATCH 1/2] gh-104523: inline minimal PGO rules Various rules were only ever invoked once and had minimal bodies. I don't see a benefit to the indirection. So this commit inlines rules to simplify the PGO logic. skip news (trivial, non-user-visible change) --- Makefile.pre.in | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 7c44b7be5dbe67..2f9f4a1c5870ca 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -642,7 +642,7 @@ profile-gen-stamp: profile-clean-stamp exit 1;\ fi @echo "Building with support for profile generation:" - $(MAKE) build_all_generate_profile + $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)" touch $@ # Run task with profile generation build to create profile information. @@ -652,8 +652,9 @@ profile-run-stamp: # enabled. $(MAKE) profile-gen-stamp # Next, run the profile task to generate the profile information. - $(MAKE) run_profile_task - $(MAKE) build_all_merge_profile + @ # FIXME: can't run for a cross build + $(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true + $(LLVM_PROF_MERGER) # Remove profile generation binary since we are done with it. $(MAKE) clean-retain-profile # This is an expensive target to build and it does not have proper @@ -661,19 +662,6 @@ profile-run-stamp: # to record its completion and avoid re-running it. touch $@ -.PHONY: build_all_generate_profile -build_all_generate_profile: - $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)" - -.PHONY: run_profile_task -run_profile_task: - @ # FIXME: can't run for a cross build - $(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true - -.PHONY: build_all_merge_profile -build_all_merge_profile: - $(LLVM_PROF_MERGER) - # Compile Python binary with profile guided optimization. # To force re-running of the profile task, remove the profile-run-stamp file. .PHONY: profile-opt From 5e9c7159e7645d95cae46bc49746b0695a947a9f Mon Sep 17 00:00:00 2001 From: Gregory Szorc Date: Mon, 16 Jan 2023 10:20:49 -0800 Subject: [PATCH 2/2] gh-104523 overhaul build rules for optimized binaries This commit overhauls the make-based build system's rules for building optimized binaries. Along the way it fixes a myriad of bugs and shortcomings with the prior approach. The old way of producing optimized binaries had various limitations: * `make [all]` would do work when PGO was enabled because the phony `profile-opt` rule was non-empty. This prevented no-op PGO builds from working at all. This meant workflows like `make; make install` either incurred extra work or failed due to race conditions. * Same thing for BOLT, as its `bolt-opt` rule was also non-empty and always ran during `make [all]`. * BOLT could not be run multiple times without a full rebuild because `llvm-bolt` can't instrument binaries that have already received BOLT optimizations. * It was difficult to run BOLT on its own because of how various make targets and their dependencies were structured. * I found the old way that configure and make communicated the default targets to be confusing and hard to understand. There are essentially 2 major changes going on in this commit: 1. A rework of the high-level make targets for performing a build and how they are defined. 2. A rework of all the make logic related to profile-based optimization (read: PGO and BOLT). Build Target Rework =================== Before, we essentially had `build_all`, `profile-opt`, `bolt-opt` and `build_wasm` as our 3 targets for performing a build. `all` would alias to one of these, as appropriate. And there was another definition for which _simple_ make target to evaluate for non-optimized builds. This was likely `build_all` or `all`. In the rework, we introduce 2 new high-level targets: * `build-plain` - Perform a build without optimizations. * `build-optimized` - Perform a build with optimizations. `build-plain` is aliased to `build_all` in all configurations except WASM, where it is `build_wasm`. `build-optimized` by default is aliased to a target that prints an error message when optimizations aren't enabled. If PGO or BOLT are enabled, it is aliased to their respective target. `build-optimized` is the logical successor to `profile-opt`. I felt it best to delete `profile-opt` completely, as the new `build-*` high-level targets feel more friendly to use. But if people lament its loss, we can add a `profile-opt: build-optimized` to achieve almost the same result. Profiled-Based Optimization Rework ================================== Most of the make logic related to profile-based optimization (read: PGO and BOLT) has been touched in this change. A major issue with the old way of doing things was we used phony, always-executed make rules. This is a bad practice in make because it undermines no-op builds. Another issue is that the separation between the rules and what order they ran in wasn't always clear. Both PGO and BOLT consist of the same 4 phase solution: instrument, run, analyze, and apply. However, these steps weren't clearly expressed in the make logic. This is especially true for BOLT, which only had 1 make rule. Another issue with BOLT is that it was really easy to get things into a bad state. e.g. if you applied BOLT to `pythonX.Y` you could not run BOLT again unless you rebuilt `pythonX.Y` from source. In the new world, we have separate `profile---stamp` rules defining the 4 distinct `instrument`, `run`, `analyze`, and `apply` stages for both PGO and BOLT. Each of these stages is tracked by a _stamp_ semaphore file so progress can be captured. This should all be pretty straightforward. There is some minimal complexity here to handle BOLT's optional dependency on PGO, as BOLT either depends on `build_all` or `profile-pgo-apply-stamp`. As part of the refactor to BOLT we also preserve the original input binary before BOLT is applied. This original file is restored if BOLT runs again. This greatly simplifies repeated BOLT invocations, as make doesn't perform needless work. However, this is all best effort, as it is possible for some make target evaluations to still get things in a bad state. Other Remarks ============= This change effectively reverts gh-103574. The readelf based mechanism inserted by that change was effectively working around shortcomings in the make DAG. This change addresses those shortcomings so the readelf integration is no longer required. If this change perturbs any bugs, they are likely around cleaning behavior. The cleaning rules are a bit complicated and not clearly documented. And I'm unsure which targets CPython developers often iterate on. It is highly possible that state cleanup of PGO and/or BOLT files isn't as robust as it needs to be. I explicitly deleted some calls to PGO cleanup because those calls prevented no-op `make [all]` from working. It is certainly possible something somewhere (release automation?) relied on these files being deleted when they no longer are. We still have targets to purge profile files and it should be trivial to add these to appropriate make rules. What I'm trying to say is there are likely subtle workflow regressions with this refactor. But in my mind it is 3 steps forward 1 step back. It should be pretty straightforward to fix any regressions once people complain. --- .gitignore | 3 +- Makefile.pre.in | 179 ++++++++++++++++++++++++++++++++++-------------- configure | 160 ++++++++++--------------------------------- configure.ac | 59 +++++++++------- 4 files changed, 197 insertions(+), 204 deletions(-) diff --git a/.gitignore b/.gitignore index d9c4a7972f076d..547e6746e1d91b 100644 --- a/.gitignore +++ b/.gitignore @@ -122,8 +122,7 @@ Tools/unicode/data/ # hendrikmuhs/ccache-action@v1 /.ccache /platform -/profile-clean-stamp -/profile-run-stamp +/profile-*-stamp /Python/deepfreeze/*.c /pybuilddir.txt /pyconfig.h diff --git a/Makefile.pre.in b/Makefile.pre.in index 2f9f4a1c5870ca..b22f70f661e74b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -38,7 +38,6 @@ CC= @CC@ CXX= @CXX@ LINKCC= @LINKCC@ AR= @AR@ -READELF= @READELF@ SOABI= @SOABI@ LDVERSION= @LDVERSION@ LIBPYTHON= @LIBPYTHON@ @@ -601,13 +600,27 @@ LIBHACL_SHA2_HEADERS= \ ######################################################################### # Rules -# Default target -all: @DEF_MAKE_ALL_RULE@ +# Default target. +# Likely either `build-plain` or `build-optimized`. +all: @MAKE_TARGET_ALL@ # First target in Makefile is implicit default. So .PHONY needs to come after # all. .PHONY: all +# Build without any optimizations or instrumented binaries. +.PHONY: build-plain +build-plain: @MAKE_TARGET_BUILD_PLAIN@ + +# Build with optimizations (PGO, BOLT, etc). +.PHONY: build-optimized +build-optimized: @MAKE_TARGET_BUILD_OPTIMIZED@ + +.PHONY: build-optimized-not-enabled +build-optimized-not-enabled: + @echo "build-optimized requires --enable-optimizations in configure; aborting" + @exit 1 + .PHONY: build_all build_all: check-clean-src $(BUILDPYTHON) platform sharedmods \ gdbhooks Programs/_testembed scripts checksharedmods rundsymutil @@ -629,69 +642,145 @@ check-clean-src: exit 1; \ fi -# Profile generation build must start from a clean tree. +# Profile-based optimization. +# +# PGO and BOLT profile-based optimization is supported. For each optimization, +# roughly the following steps are done: +# +# 1. "Instrument" binaries with run-time data collection (e.g. build or modify +# a variant of the binary.) +# 2. "Run" instrumented binaries (via subset of test suite) to collect data. +# 3. "Analyze" / collect / merge data files from previous step. +# 4. "Apply" collected data from above. (e.g. rebuild or modify a binary). +# +# 0, 1, or multiple profile based optimizations can be enabled. +# +# We track the progress of profile-based optimization using various "stamp" +# files. An empty stamp file tracks the stage of optimization we're in. +# Each *-stamp rule that follows is defined in execution / dependency order. + +# Remove files produced by or used for tracking profile-guided optimization. +.PHONY: profile-remove +profile-remove: clean-bolt + find . -name '*.gc??' -exec rm -f {} ';' + find . -name '*.profclang?' -exec rm -f {} ';' + find . -name '*.dyn' -exec rm -f {} ';' + rm -f $(COVERAGE_INFO) + rm -rf $(COVERAGE_REPORT) + # Remove all progress tracking stamps to ensure a clean slate. + rm -f profile-*-stamp + +# Profile-based optimization requires a fresh build environment. profile-clean-stamp: - $(MAKE) clean + $(MAKE) clean profile-remove touch $@ -# Compile with profile generation enabled. -profile-gen-stamp: profile-clean-stamp +# Build with PGO instrumentation enabled. +profile-pgo-instrument-stamp: profile-clean-stamp @if [ $(LLVM_PROF_ERR) = yes ]; then \ echo "Error: Cannot perform PGO build because llvm-profdata was not found in PATH" ;\ echo "Please add it to PATH and run ./configure again" ;\ exit 1;\ fi @echo "Building with support for profile generation:" - $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)" + $(MAKE) @MAKE_TARGET_BUILD_PLAIN@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)" touch $@ -# Run task with profile generation build to create profile information. -profile-run-stamp: +# Run PGO instrumented binaries and collect profile data. +profile-pgo-run-stamp: profile-pgo-instrument-stamp @echo "Running code to generate profile data (this can take a while):" - # First, we need to create a clean build with profile generation - # enabled. - $(MAKE) profile-gen-stamp - # Next, run the profile task to generate the profile information. @ # FIXME: can't run for a cross build $(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true + touch $@ + +# Collect data files produced by running PGO instrumented binaries. +profile-pgo-analyze-stamp: profile-pgo-run-stamp $(LLVM_PROF_MERGER) # Remove profile generation binary since we are done with it. $(MAKE) clean-retain-profile - # This is an expensive target to build and it does not have proper - # makefile dependency information. So, we create a "stamp" file - # to record its completion and avoid re-running it. touch $@ -# Compile Python binary with profile guided optimization. -# To force re-running of the profile task, remove the profile-run-stamp file. -.PHONY: profile-opt -profile-opt: profile-run-stamp +# Use collected PGO data to influence rebuild of binaries. +profile-pgo-apply-stamp: profile-pgo-analyze-stamp @echo "Rebuilding with profile guided optimizations:" - -rm -f profile-clean-stamp - $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)" - -.PHONY: bolt-opt -bolt-opt: @PREBOLT_RULE@ - rm -f *.fdata - @if $(READELF) -p .note.bolt_info $(BUILDPYTHON) | grep BOLT > /dev/null; then\ - echo "skip: $(BUILDPYTHON) is already BOLTed."; \ - else \ - @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst; \ - ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true; \ - @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata; \ - @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot; \ - rm -f *.fdata; \ - rm -f $(BUILDPYTHON).bolt_inst; \ - mv $(BUILDPYTHON).bolt $(BUILDPYTHON); \ + # Need to purge PGO instrumented build to force a rebuild. + $(MAKE) clean-retain-profile + $(MAKE) @MAKE_TARGET_BUILD_PLAIN@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)" + touch $@ + +# BOLT supports instrumenting and applying changes to standalone binaries +# without having to recompile. +# +# BOLT can run independently or in addition to PGO. If running with PGO, +# it always runs after PGO. Care needs to be taken to preserve PGO state +# when running BOLT so make doesn't re-apply PGO. +# +# BOLT also can't instrument binaries that have already had BOLT applied +# to them. So we make an attempt to preserve and re-use the pristine +# pre-BOLT binaries so developers can iterate on just BOLT optimization +# passes. + +# List of binaries that BOLT runs on. +BOLT_BINARIES = $(BUILDPYTHON) + +# Remove traces of bolt. +.PHONY: clean-bolt +clean-bolt: + # Instrumented binaries. + find . -name '*.bolt_inst' -exec rm -f {} ';' + # The data files they produce. + find . -name '*.fdata' -exec rm -f {} ';' + # Copied of binaries before BOLT application. + find . -name '*.prebolt' -exec rm -f {} ';' + +# BOLTs dependencies are a bit wonky. +# +# If PGO is enabled, we can take a native rule dependency on a stamp file. +# If PGO isn't enabled, we don't have a stamp to key off of and the phony +# target (e.g. build_all) will always force rebuilds. So we call out to +# make externally to sidestep the dependency. +# +# We can simplify this hack if we ever get stamp files for plain builds. +profile-bolt-prebuild-stamp: @MAKE_BOLT_NATIVE_DEPENDENCY@ + if [ -n "@MAKE_BOLT_MAKE_DEPENDENCY@" ]; then \ + $(MAKE) @MAKE_BOLT_MAKE_DEPENDENCY@; \ fi + touch $@ +profile-bolt-instrument-stamp: profile-bolt-prebuild-stamp + for bin in $(BOLT_BINARIES); do \ + if [ -e "$${bin}.prebolt" ]; then \ + echo "Restoring pre-BOLT binary $${bin}.prebolt"; \ + mv "$${bin}.prebolt" "$${bin}"; \ + fi \ + done + # Ensure prior BOLT state is purged. + $(MAKE) clean-bolt + @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst + touch $@ + +profile-bolt-run-stamp: profile-bolt-instrument-stamp + ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true + touch $@ + +profile-bolt-analyze-stamp: profile-bolt-run-stamp + @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata + touch $@ + +profile-bolt-apply-stamp: profile-bolt-analyze-stamp + @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot + mv $(BUILDPYTHON) $(BUILDPYTHON).prebolt + mv $(BUILDPYTHON).bolt $(BUILDPYTHON) + touch $@ + +# End of profile-based optimization rules. # Compile and run with gcov .PHONY: coverage coverage: @echo "Building with support for coverage checking:" $(MAKE) clean - $(MAKE) @DEF_MAKE_RULE@ CFLAGS="$(CFLAGS) -O0 -pg --coverage" LDFLAGS="$(LDFLAGS) --coverage" + $(MAKE) @MAKE_SIMPLE_BUILD_TARGET@ CFLAGS="$(CFLAGS) -O0 -pg --coverage" LDFLAGS="$(LDFLAGS) --coverage" .PHONY: coverage-lcov coverage-lcov: @@ -2610,23 +2699,9 @@ clean-retain-profile: pycremoval -rm -f Python/frozen_modules/MANIFEST -find build -type f -a ! -name '*.gc??' -exec rm -f {} ';' -rm -f Include/pydtrace_probes.h - -rm -f profile-gen-stamp - -.PHONY: profile-removal -profile-removal: - find . -name '*.gc??' -exec rm -f {} ';' - find . -name '*.profclang?' -exec rm -f {} ';' - find . -name '*.dyn' -exec rm -f {} ';' - rm -f $(COVERAGE_INFO) - rm -rf $(COVERAGE_REPORT) - rm -f profile-run-stamp .PHONY: clean clean: clean-retain-profile - @if test @DEF_MAKE_ALL_RULE@ = profile-opt; then \ - rm -f profile-gen-stamp profile-clean-stamp; \ - $(MAKE) profile-removal; \ - fi .PHONY: clobber clobber: clean diff --git a/configure b/configure index 7aad4fe89e3cbf..efad1988a2ff8e 100755 --- a/configure +++ b/configure @@ -885,9 +885,8 @@ CFLAGS_ALIASING OPT MERGE_FDATA LLVM_BOLT -ac_ct_READELF -READELF -PREBOLT_RULE +MAKE_BOLT_MAKE_DEPENDENCY +MAKE_BOLT_NATIVE_DEPENDENCY LLVM_PROF_FOUND LLVM_PROFDATA LLVM_PROF_ERR @@ -898,8 +897,9 @@ PGO_PROF_GEN_FLAG LLVM_AR_FOUND LLVM_AR PROFILE_TASK -DEF_MAKE_RULE -DEF_MAKE_ALL_RULE +MAKE_TARGET_BUILD_OPTIMIZED +MAKE_TARGET_BUILD_PLAIN +MAKE_TARGET_ALL ABIFLAGS LN MKDIR_P @@ -7448,7 +7448,17 @@ $as_echo "no" >&6; } fi # Enable optimization flags +# Which target `all` (the default make target) depends on. +# Which target to evaluate for non-optimized builds. + + + +# Do a non-optimized generic build by default. Build configurations below +# can override as appropriate. +MAKE_TARGET_ALL="build-plain" +MAKE_TARGET_BUILD_PLAIN="build_all" +MAKE_TARGET_BUILD_OPTIMIZED="build-optimized-not-enabled" Py_OPT='false' { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-optimizations" >&5 @@ -7473,13 +7483,15 @@ fi if test "$Py_OPT" = 'true' ; then + # PGO is implied by optimizations mode. + PGO_ENABLED=1 + MAKE_TARGET_ALL="build-optimized" + MAKE_TARGET_BUILD_OPTIMIZED="profile-pgo-apply-stamp" + # Intentionally not forcing Py_LTO='true' here. Too many toolchains do not # compile working code using it and both test_distutils and test_gdb are # broken when you do manage to get a toolchain that works with it. People # who want LTO need to use --with-lto themselves. - DEF_MAKE_ALL_RULE="profile-opt" - REQUIRE_PGO="yes" - DEF_MAKE_RULE="build_all" case $CC in *gcc*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-semantic-interposition" >&5 @@ -7523,13 +7535,7 @@ fi ;; esac elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then - DEF_MAKE_ALL_RULE="build_wasm" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" -else - DEF_MAKE_ALL_RULE="build_all" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" + MAKE_TARGET_BUILD_PLAIN="build_wasm" fi @@ -8039,7 +8045,8 @@ case $CC in if test $LLVM_PROF_FOUND = not-found then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" + + if test -n "${ENABLE_PGO}" then as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 fi @@ -8055,10 +8062,10 @@ case $CC in if test "${LLVM_PROF_FOUND}" = "not-found" then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" - then - as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 - fi + if test -n "${ENABLE_PGO}" + then + as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 + fi fi ;; *) @@ -8101,115 +8108,18 @@ fi -if test "$Py_BOLT" = 'true' ; then - PREBOLT_RULE="${DEF_MAKE_ALL_RULE}" - DEF_MAKE_ALL_RULE="bolt-opt" - DEF_MAKE_RULE="build_all" - - - if test -n "$ac_tool_prefix"; then - for ac_prog in readelf - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_READELF+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$READELF"; then - ac_cv_prog_READELF="$READELF" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_READELF="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -READELF=$ac_cv_prog_READELF -if test -n "$READELF"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $READELF" >&5 -$as_echo "$READELF" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - test -n "$READELF" && break - done -fi -if test -z "$READELF"; then - ac_ct_READELF=$READELF - for ac_prog in readelf -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_READELF+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_READELF"; then - ac_cv_prog_ac_ct_READELF="$ac_ct_READELF" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_READELF="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_READELF=$ac_cv_prog_ac_ct_READELF -if test -n "$ac_ct_READELF"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_READELF" >&5 -$as_echo "$ac_ct_READELF" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_READELF" && break -done - - if test "x$ac_ct_READELF" = x; then - READELF=""notfound"" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - READELF=$ac_ct_READELF - fi -fi +if test "$Py_BOLT" = 'true' ; then + MAKE_TARGET_BUILD_OPTIMIZED="profile-bolt-apply-stamp" - if test "$READELF" == "notfound" + # Hook up make dependencies differently depending on whether PGO is + # enabled. See inline comment in Makefile.pre.in for how this works. + if test -n "${PGO_ENABLED}" then - as_fn_error $? "readelf is required for a --enable-bolt build but could not be found." "$LINENO" 5 + MAKE_BOLT_NATIVE_DEPENDENCY="profile-pgo-apply-stamp" + else + MAKE_BOLT_MAKE_DEPENDENCY="${MAKE_TARGET_BUILD_PLAIN}" fi # -fno-reorder-blocks-and-partition is required for bolt to work. diff --git a/configure.ac b/configure.ac index 115998e0753b26..073f6070876225 100644 --- a/configure.ac +++ b/configure.ac @@ -1722,8 +1722,18 @@ else fi # Enable optimization flags -AC_SUBST(DEF_MAKE_ALL_RULE) -AC_SUBST(DEF_MAKE_RULE) +# Which target `all` (the default make target) depends on. +AC_SUBST(MAKE_TARGET_ALL) +# Which target to evaluate for non-optimized builds. +AC_SUBST(MAKE_TARGET_BUILD_PLAIN) +AC_SUBST(MAKE_TARGET_BUILD_OPTIMIZED) + +# Do a non-optimized generic build by default. Build configurations below +# can override as appropriate. +MAKE_TARGET_ALL="build-plain" +MAKE_TARGET_BUILD_PLAIN="build_all" +MAKE_TARGET_BUILD_OPTIMIZED="build-optimized-not-enabled" + Py_OPT='false' AC_MSG_CHECKING(for --enable-optimizations) AC_ARG_ENABLE(optimizations, AS_HELP_STRING( @@ -1741,13 +1751,15 @@ fi], [AC_MSG_RESULT(no)]) if test "$Py_OPT" = 'true' ; then + # PGO is implied by optimizations mode. + PGO_ENABLED=1 + MAKE_TARGET_ALL="build-optimized" + MAKE_TARGET_BUILD_OPTIMIZED="profile-pgo-apply-stamp" + # Intentionally not forcing Py_LTO='true' here. Too many toolchains do not # compile working code using it and both test_distutils and test_gdb are # broken when you do manage to get a toolchain that works with it. People # who want LTO need to use --with-lto themselves. - DEF_MAKE_ALL_RULE="profile-opt" - REQUIRE_PGO="yes" - DEF_MAKE_RULE="build_all" case $CC in *gcc*) AX_CHECK_COMPILE_FLAG([-fno-semantic-interposition],[ @@ -1759,13 +1771,7 @@ if test "$Py_OPT" = 'true' ; then elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then dnl Emscripten does not support shared extensions yet. Build dnl "python.[js,wasm]", "pybuilddir.txt", and "platform" files. - DEF_MAKE_ALL_RULE="build_wasm" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" -else - DEF_MAKE_ALL_RULE="build_all" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" + MAKE_TARGET_BUILD_PLAIN="build_wasm" fi AC_ARG_VAR(PROFILE_TASK, Python args for PGO generation task) @@ -1967,7 +1973,8 @@ case $CC in if test $LLVM_PROF_FOUND = not-found then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" + + if test -n "${ENABLE_PGO}" then AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) fi @@ -1983,10 +1990,10 @@ case $CC in if test "${LLVM_PROF_FOUND}" = "not-found" then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" - then - AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) - fi + if test -n "${ENABLE_PGO}" + then + AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) + fi fi ;; *) @@ -2022,17 +2029,19 @@ else fi], [AC_MSG_RESULT(no)]) -AC_SUBST(PREBOLT_RULE) +AC_SUBST(MAKE_BOLT_NATIVE_DEPENDENCY) +AC_SUBST(MAKE_BOLT_MAKE_DEPENDENCY) + if test "$Py_BOLT" = 'true' ; then - PREBOLT_RULE="${DEF_MAKE_ALL_RULE}" - DEF_MAKE_ALL_RULE="bolt-opt" - DEF_MAKE_RULE="build_all" + MAKE_TARGET_BUILD_OPTIMIZED="profile-bolt-apply-stamp" - AC_SUBST(READELF) - AC_CHECK_TOOLS(READELF, [readelf], "notfound") - if test "$READELF" == "notfound" + # Hook up make dependencies differently depending on whether PGO is + # enabled. See inline comment in Makefile.pre.in for how this works. + if test -n "${PGO_ENABLED}" then - AC_MSG_ERROR([readelf is required for a --enable-bolt build but could not be found.]) + MAKE_BOLT_NATIVE_DEPENDENCY="profile-pgo-apply-stamp" + else + MAKE_BOLT_MAKE_DEPENDENCY="${MAKE_TARGET_BUILD_PLAIN}" fi # -fno-reorder-blocks-and-partition is required for bolt to work.