from __future__ import annotations import ast import base64 import io import itertools import lzma import marshal import sys import types from collections.abc import Callable, Iterable from functools import partial from pathlib import Path from typing import NewType PythonCode = NewType("PythonCode", bytes) def encode_bytes(data: bytes) -> PythonCode: ENCODINGS: list[tuple[Callable[[bytes], bytes], bytes]] = [ (base64.b85encode, b"__import__('base64').b85decode"), (base64.b64encode, b"__import__('base64').b64decode"), (lambda b: repr(b).encode(), b""), ] encodes: list[PythonCode] = [ PythonCode(decode + b"('" + encode(data) + b"')" if decode else encode(data)) for encode, decode in ENCODINGS ] encodes.sort(key=len) return encodes[0] def compress_bytes(data: bytes) -> PythonCode: """Compress bytes to python code.""" COMPRESSIONS: list[tuple[Callable[[bytes], bytes], bytes]] = [ (__import__("bz2").compress, b"__import__('bz2').decompress"), (__import__("gzip").compress, b"__import__('gzip').decompress"), (__import__("lzma").compress, b"__import__('lzma').decompress"), (partial(__import__("zlib").compress, level=9), b"__import__('zlib').decompress"), (partial(lzma.compress, preset=9 | lzma.PRESET_EXTREME), b"__import__('lzma').decompress"), ] min_code = PythonCode(repr(data).encode()) min_length = len(min_code) for compress, decompress in COMPRESSIONS: result = decompress + b"(" + encode_bytes(compress(data)) + b")" if len(result) < min_length: min_length = len(result) min_code = PythonCode(result) return min_code def minify_code(code: bytes, filename: str) -> bytes: """Minify python code.""" tree: ast.AST = compile( code, filename, "exec", flags=annotations.compiler_flag | ast.PyCF_ONLY_AST, dont_inherit=True, ) class RewriteAssign(ast.NodeTransformer): def visit_AnnAssign(self, node: ast.AnnAssign) -> ast.AST | None: if not node.value: return node # important for dataclasses! return ast.Assign( targets=[node.target], value=node.value, type_comment=None, ) tree = ast.fix_missing_locations(RewriteAssign().visit(tree)) def update_tree() -> bool: for node in ast.walk(tree): if isinstance(node, ast.AnnAssign): continue # already handled if getattr(node, "body", None) and isinstance(node.body, Iterable): body = iter(node.body) node.body = [next(body)] for n in body: prev = node.body[-1] if ( isinstance(n, ast.FunctionDef) and isinstance(prev, ast.FunctionDef) and prev.name == n.name and prev.name not in [ _.id for _ in itertools.chain.from_iterable(ast.walk(x) for x in n.decorator_list) if isinstance(_, ast.Name) ] ): node.body[-1] = n else: node.body.append(n) for attr in ( "type_param", "type_params", "returns", "type_comment", "annotation" ): if getattr(node, attr, None): setattr(node, attr, None) return True if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)): continue if ast.get_docstring(node): node.body = node.body[1:] or [ast.Ellipsis()] return False while update_tree(): ast.fix_missing_locations(tree) return ast.unparse(tree).encode() def compress_code(code: bytes, filename = "__main__") -> PythonCode: """Compress python code to python code.""" code = minify_code(code, filename=filename) min_code = PythonCode(code) min_length = len(min_code) compressed_code = b"exec(" + compress_bytes(min_code) + b")" if len(compressed_code) < min_length: min_code = PythonCode(compressed_code) min_length = len(compressed_code) compiled: types.CodeType = compile( code, filename, "exec", flags=annotations.compiler_flag, dont_inherit=True, optimize=2, ).replace(co_linetable=b"") compiled = compiled.replace(co_consts=tuple( (c.replace(co_linetable=b"") if isinstance(c, types.CodeType) else c) for c in compiled.co_consts )) def m_dumps(o: types.CodeType, version: int) -> bytes: return marshal.dumps(o, version) CONVERTS: list[tuple[Callable[[types.CodeType], bytes], bytes, int]] = [ (marshal.dumps, b"__import__('marshal').loads"), *( (partial(m_dumps, version=v), b"__import__('marshal').loads") for v in range(marshal.version) ), ] for convert, deconvert in CONVERTS: result = b"exec(" + deconvert + b"(" + compress_bytes(convert(compiled)) + b"))" if len(result) < min_length: min_length = len(result) min_code = PythonCode(result) return min_code def main(output: io.BytesIO = sys.stdout.buffer) -> str | int: args = sys.argv[1:] if len(args) <= 1: path = Path(args[0]) if args else Path(__file__) output.write( compress_code(path.read_bytes(), path.name) ) output.write(b"\n") output.flush() return 0 output.write( b"def w(f, c):\n" b" from pathlib import Path\n" b" (p:=Path(f)).parent.mkdir(511,1,1)\n" b" p.write_bytes(c)\n" b"\n" ) for file in args: output.write(b"w(") output.write(repr(file).encode()) output.write(b",") path = Path(file) output.write(repr(compress_code(path.read_bytes(), path.name)).encode()) output.write(b")\n") output.flush() return 0 if __name__ == "__main__": sys.exit(main())