Last active
March 16, 2026 06:29
-
-
Save hsupu/feacdda135332d847bd5e3ccaa3ee351 to your computer and use it in GitHub Desktop.
Script to fix python venv exe after moved.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 260316 | |
| # see https://stackoverflow.com/questions/35412392/how-can-i-use-setuptools-to-create-an-exe-launcher | |
| from __future__ import annotations | |
| import io | |
| import logging | |
| import os | |
| import re | |
| import sys | |
| import sysconfig | |
| import zipfile | |
| logger = logging.getLogger(__name__) | |
| IS_WINDOWS = sys.platform == 'win32' | |
| def _scan_win32(bindir, regex_shebang, regex_entrypoint, pyexe): | |
| specs = [] | |
| for filename in os.listdir(bindir): | |
| if not filename.endswith('.exe'): | |
| continue | |
| if filename in ('python.exe', 'pythonw.exe', 'python_d.exe', 'pythonw_d.exe'): | |
| continue | |
| fullname = os.path.join(bindir, filename) | |
| logger.info(f'Open {fullname}') | |
| with open(fullname, 'rb') as f: | |
| # 解析 PE format,确定 overlay 位置 | |
| # | |
| # 0:2 MZ | |
| f.seek(0) | |
| if f.read(2) != b'MZ': | |
| logger.error(f'Not a MZ file: {filename}') | |
| continue | |
| # 0x3C:4 e_lfanew, offset to PE header | |
| f.seek(0x3C) | |
| pe_offset = int.from_bytes(f.read(4), 'little') | |
| # 0:4 PE\0\0 | |
| f.seek(pe_offset) | |
| if f.read(4) != b'PE\0\0': | |
| logger.error(f'Not a PE file: {filename}') | |
| continue | |
| pe_offset += 4 | |
| # 2:2 NumberOfSections | |
| f.seek(pe_offset + 2) | |
| NumberOfSections = int.from_bytes(f.read(2), 'little', signed=False) | |
| # 16:2 SizeOfOptionalHeader | |
| f.seek(pe_offset + 16) | |
| SizeOfOptionalHeader = int.from_bytes(f.read(2), 'little', signed=False) | |
| # skip OptionalHeader | |
| pe_offset += 20 + SizeOfOptionalHeader | |
| # now at SectionTable | |
| # each section 40 bytes | |
| max_end = 0 | |
| for i in range(NumberOfSections): | |
| # 16:4 SizeOfRawData | |
| f.seek(pe_offset + i * 40 + 16) | |
| SizeOfRawData = int.from_bytes(f.read(4), 'little', signed=False) | |
| # 20:4 PointerToRawData | |
| f.seek(pe_offset + i * 40 + 20) | |
| PointerToRawData = int.from_bytes(f.read(4), 'little', signed=False) | |
| end = PointerToRawData + SizeOfRawData | |
| # logger.debug(f'Section {i}: {PointerToRawData:x} + {SizeOfRawData:x} = {end:x}') | |
| if end > max_end: | |
| max_end = end | |
| logger.debug(f'Overlay offset: {max_end:x}') | |
| f.seek(max_end) | |
| # 目前有两种情况得到支持 | |
| header = f.read(2) | |
| if header == b'PK': | |
| def parse_250806(): | |
| """ | |
| 整体是 zip(__main__.py) | |
| 需要手动解压缩得到 __main__.py,该文件头部有 shebang python.exe | |
| """ | |
| f.seek(max_end) | |
| zip_data = f.read() | |
| with zipfile.ZipFile(io.BytesIO(zip_data)) as zf: | |
| with zf.open('__main__.py') as main_py: | |
| content = main_py.read().decode() | |
| match = regex_shebang.search(content) | |
| if not match: | |
| logger.error(f'shebang not matched our regex pattern, skipping: {filename} {content}') | |
| return None | |
| if match.group(1).casefold() == pyexe.casefold(): | |
| logger.debug(f'no need to update: {filename}') | |
| return None | |
| return content | |
| content = parse_250806() | |
| elif header == b'#!': | |
| def parse_220806(): | |
| """ | |
| 结构是 shebang python.exe + \n + zip(__main__.py) | |
| 运行 python 来解压缩得到 __main__.py | |
| """ | |
| shebang = '#!' + f.readline(1024).decode().rstrip() | |
| match = regex_shebang.search(shebang) | |
| if not match: | |
| logger.error(f'shebang not matched our regex pattern, skipping: {filename} {shebang}') | |
| return None | |
| if match.group(1).casefold() == pyexe.casefold(): | |
| logger.debug(f'no need to update: {filename}') | |
| return None | |
| zip_data = f.read() | |
| with zipfile.ZipFile(io.BytesIO(zip_data)) as zf: | |
| with zf.open('__main__.py') as main_py: | |
| content = main_py.read().decode() | |
| return content | |
| content = parse_220806() | |
| else: | |
| logger.error(f'Unknown header, fix_entrypoints.py is outdated?: {filename} {header}') | |
| continue | |
| if content is None: | |
| continue | |
| logger.debug(content) | |
| spec = _extract_spec_win32(filename, fullname, content, regex_entrypoint) | |
| if spec: | |
| specs.append(spec) | |
| return specs | |
| def _extract_spec_win32(filename, fullname, content, regex_entrypoint): | |
| """Windows: __main__.py 从 zip 解出,格式固定,直接匹配 from X import Y""" | |
| match = regex_entrypoint.search(content) | |
| if not match: | |
| logger.error(f'Entrypoint not matched. fix_entrypoints.py is outdated?: {filename}\n{content}') | |
| return None | |
| basename, extname = os.path.basename(fullname).rsplit('.', maxsplit=1) | |
| modulepath, varname = match.group(1), match.group(2) | |
| return f"{basename} = {modulepath}:{varname}" | |
| SKIP_UNIX = { | |
| 'python', 'python3', | |
| 'activate', 'activate.csh', 'activate.fish', | |
| 'activate.nu', 'activate.ps1', 'Activate.ps1', | |
| 'activate_this.py', 'deactivate.nu', | |
| } | |
| def _scan_unix_symlinks(bindir, pyexe): | |
| """找出 bindir 中指向错误 python 的符号链接""" | |
| regex_python = re.compile(r'^python[0-9.]*$') | |
| symlinks = [] | |
| for filename in os.listdir(bindir): | |
| if not regex_python.match(filename): | |
| continue | |
| fullname = os.path.join(bindir, filename) | |
| if not os.path.islink(fullname): | |
| continue | |
| target = os.readlink(fullname) | |
| # 相对符号链接(如 python3 -> python)不需要修复,修复 python 即可 | |
| if not os.path.isabs(target): | |
| continue | |
| if os.path.realpath(fullname) == os.path.realpath(pyexe): | |
| logger.debug(f'symlink ok: {filename} -> {target}') | |
| continue | |
| symlinks.append((fullname, target)) | |
| return symlinks | |
| def _scan_unix(bindir, regex_shebang): | |
| regex_skip = re.compile(r'^python[0-9.]*$') | |
| specs = [] | |
| for filename in os.listdir(bindir): | |
| fullname = os.path.join(bindir, filename) | |
| if not os.path.isfile(fullname): | |
| continue | |
| if filename in SKIP_UNIX or regex_skip.match(filename): | |
| continue | |
| try: | |
| with open(fullname, 'r', encoding='utf-8') as f: | |
| content = f.read(8192) | |
| except (UnicodeDecodeError, PermissionError): | |
| # 二进制文件或无权限,跳过 | |
| continue | |
| if not content.startswith('#!'): | |
| continue | |
| match = regex_shebang.search(content) | |
| if not match: | |
| # shebang 不指向 python,不是 pip 入口点 | |
| continue | |
| shebang_python = match.group(1) | |
| # shebang 指向 bindir 内的 python(如 .venv/bin/python3),通过 symlink 间接引用,无需修改 | |
| if os.path.dirname(shebang_python) == bindir: | |
| logger.debug(f'no need to update: {filename}') | |
| continue | |
| logger.info(f'Open {fullname}') | |
| spec = _extract_spec_unix(filename, fullname, content) | |
| if spec: | |
| specs.append(spec) | |
| return specs | |
| _RE_SYS_EXIT = re.compile(r'sys\.exit\(([\w.]+)\(\)\)') | |
| def _extract_spec_unix(filename, fullname, content): | |
| """Unix: 通过 sys.exit(func()) 确认是 console_scripts,再反查 from X import func""" | |
| match_exit = _RE_SYS_EXIT.search(content) | |
| if not match_exit: | |
| logger.debug(f'Not a console_scripts entrypoint, skipping: {filename}') | |
| return None | |
| callpath = match_exit.group(1) | |
| if '.' in callpath: | |
| # sys.exit(cli.cli_detect()) -> from X import cli, 入口是 X.cli:cli_detect | |
| obj, method = callpath.rsplit('.', 1) | |
| match_import = re.search( | |
| rf'^\s*from\s+(\S+)\s+import\s+{re.escape(obj)}\s*$', | |
| content, re.MULTILINE, | |
| ) | |
| if not match_import: | |
| logger.debug(f'Cannot find import for {obj}, skipping: {filename}') | |
| return None | |
| modulepath = match_import.group(1) + '.' + obj | |
| funcname = method | |
| else: | |
| # sys.exit(main()) -> from X import main | |
| funcname = callpath | |
| match_import = re.search( | |
| rf'^\s*from\s+(\S+)\s+import\s+{re.escape(funcname)}\s*$', | |
| content, re.MULTILINE, | |
| ) | |
| if not match_import: | |
| logger.debug(f'Cannot find import for {funcname}, skipping: {filename}') | |
| return None | |
| modulepath = match_import.group(1) | |
| basename = os.path.basename(fullname) | |
| if basename.endswith('.py'): | |
| basename = basename[:-3] | |
| return f"{basename} = {modulepath}:{funcname}" | |
| def main(args=None): | |
| bindir = getattr(args, 'bindir', None) | |
| bindir_set = False | |
| if bindir: | |
| bindir = os.path.abspath(bindir) | |
| bindir_set = True | |
| else: | |
| bindir = sysconfig.get_path('scripts') | |
| # logger.info(f"{bindir}") | |
| pyexe = getattr(args, 'python', None) | |
| pyexe_set = False | |
| if pyexe: | |
| # while os.path.islink(pyexe): | |
| # pyexe = os.path.realpath(pyexe) | |
| pyexe = os.path.abspath(pyexe) | |
| pyexe_set = True | |
| elif bindir_set: | |
| candidate = os.path.join(bindir, 'python.exe' if IS_WINDOWS else 'python') | |
| if os.path.isfile(candidate): | |
| pyexe = candidate | |
| else: | |
| pyexe = sys.executable | |
| # logger.info(f"{pyexe}") | |
| if IS_WINDOWS: | |
| regex_shebang = re.compile(r'^#!\s*(.+\\python(?:w)?\.exe)\s*$', re.MULTILINE) | |
| else: | |
| regex_shebang = re.compile(r'^#!\s*(.+/python[0-9.]*)\s*$', re.MULTILINE) | |
| # scan | |
| specs = [] | |
| symlinks = [] | |
| if IS_WINDOWS: | |
| regex_entrypoint = re.compile(r'^\s*from (\S+) import (\S+)\s*$', re.MULTILINE) | |
| specs = _scan_win32(bindir, regex_shebang, regex_entrypoint, pyexe) | |
| else: | |
| symlinks = _scan_unix_symlinks(bindir, pyexe) | |
| specs = _scan_unix(bindir, regex_shebang) | |
| if not specs and not symlinks: | |
| logger.info('No entrypoints need to be fixed.') | |
| return | |
| # confirm | |
| interactive = not getattr(args, 'non_interactive', False) | |
| logger.info(f'Target python: {pyexe}') | |
| logger.info(f'Scripts dir: {bindir}') | |
| if symlinks: | |
| logger.info(f'Symlinks to fix ({len(symlinks)}):') | |
| for fullname, target in symlinks: | |
| logger.info(f' {os.path.basename(fullname)} -> {target} => {pyexe}') | |
| if specs: | |
| logger.info(f'Entrypoints to fix ({len(specs)}):') | |
| for spec in specs: | |
| logger.info(f' {spec}') | |
| dryrun = getattr(args, 'dry_run', False) | |
| if dryrun: | |
| return | |
| if interactive: | |
| answer = input('\nProceed? [y/N] ').strip().lower() | |
| if answer not in ('y', 'yes'): | |
| logger.warning('Aborted.') | |
| return | |
| # execute | |
| for fullname, target in symlinks: | |
| os.remove(fullname) | |
| os.symlink(pyexe, fullname) | |
| logger.warning(f'symlink: {os.path.basename(fullname)} -> {pyexe}') | |
| from pip._vendor.distlib.scripts import ScriptMaker | |
| sm = ScriptMaker( | |
| source_dir=None, # None to using entry spec instead | |
| target_dir=bindir, # folder to put | |
| add_launchers=IS_WINDOWS, # True to create .exe, False to create .py | |
| ) | |
| sm.executable = pyexe | |
| if IS_WINDOWS and sm.executable.endswith("pythonw.exe"): | |
| # 对 Windows 而言 python pythonw 不同,后者没有下挂控制台窗口 | |
| sm.executable = sm.executable.replace("pythonw", "python") | |
| # create only the main variant (not the one with X.Y suffix) | |
| sm.variants = [""] | |
| for spec in specs: | |
| logger.warning(spec) | |
| # provide an entry specification string here, just like in pyproject.toml | |
| sm.make(spec) | |
| if __name__ == '__main__': | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('-v', '--verbose', action='count', default=0) | |
| parser.add_argument('-y', '--non-interactive', action='store_true', help='skip confirmation prompt') | |
| parser.add_argument('-n', '--dry-run', '--dryrun', action='store_true', help='show what would be done without executing') | |
| parser.add_argument('-p', '--python', help='target python executable path (default: sys.executable)') | |
| parser.add_argument('-d', '--bindir', help='scripts directory to scan (default: sysconfig scripts path)') | |
| args = parser.parse_args() | |
| if args.verbose > 1: | |
| logging.basicConfig(level=logging.DEBUG) | |
| elif args.verbose > 0: | |
| logging.basicConfig(level=logging.INFO) | |
| else: | |
| logging.basicConfig(level=logging.WARNING) | |
| main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.