Created
October 26, 2025 11:29
-
-
Save BarishNamazov/61b5829c1ec53eda35084ba9604b774a to your computer and use it in GitHub Desktop.
Analyzes which extra GCC -O3 optimization flags affect code generation or performance compared to -O2, by comparing assemblies and benchmarking results.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import subprocess | |
| import sys | |
| import os | |
| import filecmp | |
| def compile_to_asm(source_file, flags, output_path): | |
| subprocess.run( | |
| ["g++"] + flags + ["-S", source_file, "-o", output_path], | |
| check=True, | |
| stdout=subprocess.DEVNULL, | |
| stderr=subprocess.DEVNULL, | |
| ) | |
| def compile_binary(source_file, flags, output_path): | |
| subprocess.run(["g++"] + flags + [source_file, "-o", output_path], check=True) | |
| def get_o3_extra_flags(): | |
| o2_flags = ( | |
| os.popen("gcc -Q --help=optimizers -O2 2>/dev/null | grep enabled") | |
| .read() | |
| .splitlines() | |
| ) | |
| o3_flags = ( | |
| os.popen("gcc -Q --help=optimizers -O3 2>/dev/null | grep enabled") | |
| .read() | |
| .splitlines() | |
| ) | |
| enabled_o2 = {line.strip().split()[0] for line in o2_flags if "enabled" in line} | |
| enabled_o3 = {line.strip().split()[0] for line in o3_flags if "enabled" in line} | |
| return sorted(enabled_o3 - enabled_o2) | |
| def main(): | |
| if len(sys.argv) != 2: | |
| print("Usage: ./find_culprit.py <source.cpp>") | |
| sys.exit(1) | |
| source_file = sys.argv[1] | |
| if not os.path.exists(source_file): | |
| print(f"Error: {source_file} not found") | |
| sys.exit(1) | |
| base_name = os.path.splitext(os.path.basename(source_file))[0] | |
| print(f"Analyzing optimization flags for: {source_file}\n") | |
| o3_extra_flags = get_o3_extra_flags() | |
| print(f"Found {len(o3_extra_flags)} additional flags in O3 compared to O2") | |
| print(f"Flags: {', '.join(o3_extra_flags)}\n") | |
| os.makedirs("build", exist_ok=True) | |
| print("Generating baseline assemblies...") | |
| compile_to_asm(source_file, ["-O2"], "build/baseline_o2.s") | |
| compile_to_asm(source_file, ["-O3"], "build/baseline_o3.s") | |
| print("Analyzing flags by assembly comparison...") | |
| matches_o2 = [] | |
| matches_o3 = [] | |
| to_benchmark = [] | |
| for flag in o3_extra_flags: | |
| flag_name = flag.replace("-f", "").replace("-", "_") | |
| asm_path = f"build/test_{flag_name}.s" | |
| compile_to_asm(source_file, ["-O2", flag], asm_path) | |
| if filecmp.cmp(asm_path, "build/baseline_o2.s", shallow=False): | |
| matches_o2.append(flag) | |
| elif filecmp.cmp(asm_path, "build/baseline_o3.s", shallow=False): | |
| matches_o3.append(flag) | |
| else: | |
| to_benchmark.append(flag) | |
| print("\nResults:") | |
| print(f" Same assembly with O2: {', '.join(matches_o2) if matches_o2 else 'None'}") | |
| print(f" Same assembly with O3: {', '.join(matches_o3) if matches_o3 else 'None'}") | |
| if to_benchmark: | |
| print(f" Different assembly: {', '.join(to_benchmark)}") | |
| else: | |
| print("\nNo flags need benchmarking.") | |
| sys.exit(0) | |
| print(f"\nCompiling {len(to_benchmark) + 2} binaries for benchmarking...") | |
| compile_binary(source_file, ["-O2"], f"build/{base_name}_o2") | |
| compile_binary(source_file, ["-O3"], f"build/{base_name}_o3") | |
| for flag in to_benchmark: | |
| flag_name = flag.replace("-f", "").replace("-", "_") | |
| compile_binary(source_file, ["-O2", flag], f"build/{base_name}_o2_{flag_name}") | |
| print("\nRunning benchmarks...\n") | |
| hyperfine_cmd = [ | |
| "hyperfine", | |
| "--warmup", | |
| "3", | |
| "--min-runs", | |
| "10", | |
| "--command-name", | |
| "O2", | |
| f"./build/{base_name}_o2", | |
| "--command-name", | |
| "O3", | |
| f"./build/{base_name}_o3", | |
| ] | |
| for flag in to_benchmark: | |
| flag_name = flag.replace("-f", "").replace("-", "_") | |
| hyperfine_cmd.extend( | |
| ["--command-name", f"O2+{flag}", f"./build/{base_name}_o2_{flag_name}"] | |
| ) | |
| subprocess.run(hyperfine_cmd, check=True) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment