Skip to content

Instantly share code, notes, and snippets.

@BarishNamazov
Created October 26, 2025 11:29
Show Gist options
  • Select an option

  • Save BarishNamazov/61b5829c1ec53eda35084ba9604b774a to your computer and use it in GitHub Desktop.

Select an option

Save BarishNamazov/61b5829c1ec53eda35084ba9604b774a to your computer and use it in GitHub Desktop.
Analyzes which extra GCC -O3 optimization flags affect code generation or performance compared to -O2, by comparing assemblies and benchmarking results.
#!/usr/bin/env python3
import subprocess
import sys
import os
import filecmp
def compile_to_asm(source_file, flags, output_path):
subprocess.run(
["g++"] + flags + ["-S", source_file, "-o", output_path],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def compile_binary(source_file, flags, output_path):
subprocess.run(["g++"] + flags + [source_file, "-o", output_path], check=True)
def get_o3_extra_flags():
o2_flags = (
os.popen("gcc -Q --help=optimizers -O2 2>/dev/null | grep enabled")
.read()
.splitlines()
)
o3_flags = (
os.popen("gcc -Q --help=optimizers -O3 2>/dev/null | grep enabled")
.read()
.splitlines()
)
enabled_o2 = {line.strip().split()[0] for line in o2_flags if "enabled" in line}
enabled_o3 = {line.strip().split()[0] for line in o3_flags if "enabled" in line}
return sorted(enabled_o3 - enabled_o2)
def main():
if len(sys.argv) != 2:
print("Usage: ./find_culprit.py <source.cpp>")
sys.exit(1)
source_file = sys.argv[1]
if not os.path.exists(source_file):
print(f"Error: {source_file} not found")
sys.exit(1)
base_name = os.path.splitext(os.path.basename(source_file))[0]
print(f"Analyzing optimization flags for: {source_file}\n")
o3_extra_flags = get_o3_extra_flags()
print(f"Found {len(o3_extra_flags)} additional flags in O3 compared to O2")
print(f"Flags: {', '.join(o3_extra_flags)}\n")
os.makedirs("build", exist_ok=True)
print("Generating baseline assemblies...")
compile_to_asm(source_file, ["-O2"], "build/baseline_o2.s")
compile_to_asm(source_file, ["-O3"], "build/baseline_o3.s")
print("Analyzing flags by assembly comparison...")
matches_o2 = []
matches_o3 = []
to_benchmark = []
for flag in o3_extra_flags:
flag_name = flag.replace("-f", "").replace("-", "_")
asm_path = f"build/test_{flag_name}.s"
compile_to_asm(source_file, ["-O2", flag], asm_path)
if filecmp.cmp(asm_path, "build/baseline_o2.s", shallow=False):
matches_o2.append(flag)
elif filecmp.cmp(asm_path, "build/baseline_o3.s", shallow=False):
matches_o3.append(flag)
else:
to_benchmark.append(flag)
print("\nResults:")
print(f" Same assembly with O2: {', '.join(matches_o2) if matches_o2 else 'None'}")
print(f" Same assembly with O3: {', '.join(matches_o3) if matches_o3 else 'None'}")
if to_benchmark:
print(f" Different assembly: {', '.join(to_benchmark)}")
else:
print("\nNo flags need benchmarking.")
sys.exit(0)
print(f"\nCompiling {len(to_benchmark) + 2} binaries for benchmarking...")
compile_binary(source_file, ["-O2"], f"build/{base_name}_o2")
compile_binary(source_file, ["-O3"], f"build/{base_name}_o3")
for flag in to_benchmark:
flag_name = flag.replace("-f", "").replace("-", "_")
compile_binary(source_file, ["-O2", flag], f"build/{base_name}_o2_{flag_name}")
print("\nRunning benchmarks...\n")
hyperfine_cmd = [
"hyperfine",
"--warmup",
"3",
"--min-runs",
"10",
"--command-name",
"O2",
f"./build/{base_name}_o2",
"--command-name",
"O3",
f"./build/{base_name}_o3",
]
for flag in to_benchmark:
flag_name = flag.replace("-f", "").replace("-", "_")
hyperfine_cmd.extend(
["--command-name", f"O2+{flag}", f"./build/{base_name}_o2_{flag_name}"]
)
subprocess.run(hyperfine_cmd, check=True)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment