Skip to content

Instantly share code, notes, and snippets.

@Joshix-1
Last active April 19, 2024 21:22
Show Gist options
  • Select an option

  • Save Joshix-1/9e1c3481562a2873a94d12c1548dcc67 to your computer and use it in GitHub Desktop.

Select an option

Save Joshix-1/9e1c3481562a2873a94d12c1548dcc67 to your computer and use it in GitHub Desktop.
from __future__ import annotations
import ast
import base64
import io
import itertools
import lzma
import marshal
import sys
import types
from collections.abc import Callable, Iterable
from functools import partial
from pathlib import Path
from typing import NewType
PythonCode = NewType("PythonCode", bytes)
def encode_bytes(data: bytes) -> PythonCode:
ENCODINGS: list[tuple[Callable[[bytes], bytes], bytes]] = [
(base64.b85encode, b"__import__('base64').b85decode"),
(base64.b64encode, b"__import__('base64').b64decode"),
(lambda b: repr(b).encode(), b""),
]
encodes: list[PythonCode] = [
PythonCode(decode + b"('" + encode(data) + b"')" if decode else encode(data))
for encode, decode in ENCODINGS
]
encodes.sort(key=len)
return encodes[0]
def compress_bytes(data: bytes) -> PythonCode:
"""Compress bytes to python code."""
COMPRESSIONS: list[tuple[Callable[[bytes], bytes], bytes]] = [
(__import__("bz2").compress, b"__import__('bz2').decompress"),
(__import__("gzip").compress, b"__import__('gzip').decompress"),
(__import__("lzma").compress, b"__import__('lzma').decompress"),
(partial(__import__("zlib").compress, level=9), b"__import__('zlib').decompress"),
(partial(lzma.compress, preset=9 | lzma.PRESET_EXTREME), b"__import__('lzma').decompress"),
]
min_code = PythonCode(repr(data).encode())
min_length = len(min_code)
for compress, decompress in COMPRESSIONS:
result = decompress + b"(" + encode_bytes(compress(data)) + b")"
if len(result) < min_length:
min_length = len(result)
min_code = PythonCode(result)
return min_code
def minify_code(code: bytes, filename: str) -> bytes:
"""Minify python code."""
tree: ast.AST = compile(
code,
filename,
"exec",
flags=annotations.compiler_flag | ast.PyCF_ONLY_AST,
dont_inherit=True,
)
class RewriteAssign(ast.NodeTransformer):
def visit_AnnAssign(self, node: ast.AnnAssign) -> ast.AST | None:
if not node.value:
return node # important for dataclasses!
return ast.Assign(
targets=[node.target],
value=node.value,
type_comment=None,
)
tree = ast.fix_missing_locations(RewriteAssign().visit(tree))
def update_tree() -> bool:
for node in ast.walk(tree):
if isinstance(node, ast.AnnAssign):
continue # already handled
if getattr(node, "body", None) and isinstance(node.body, Iterable):
body = iter(node.body)
node.body = [next(body)]
for n in body:
prev = node.body[-1]
if (
isinstance(n, ast.FunctionDef)
and isinstance(prev, ast.FunctionDef)
and prev.name == n.name
and prev.name not in [
_.id
for _ in itertools.chain.from_iterable(ast.walk(x) for x in n.decorator_list)
if isinstance(_, ast.Name)
]
):
node.body[-1] = n
else:
node.body.append(n)
for attr in (
"type_param", "type_params", "returns", "type_comment", "annotation"
):
if getattr(node, attr, None):
setattr(node, attr, None)
return True
if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)):
continue
if ast.get_docstring(node):
node.body = node.body[1:] or [ast.Ellipsis()]
return False
while update_tree():
ast.fix_missing_locations(tree)
return ast.unparse(tree).encode()
def compress_code(code: bytes, filename = "__main__") -> PythonCode:
"""Compress python code to python code."""
code = minify_code(code, filename=filename)
min_code = PythonCode(code)
min_length = len(min_code)
compressed_code = b"exec(" + compress_bytes(min_code) + b")"
if len(compressed_code) < min_length:
min_code = PythonCode(compressed_code)
min_length = len(compressed_code)
compiled: types.CodeType = compile(
code,
filename,
"exec",
flags=annotations.compiler_flag,
dont_inherit=True,
optimize=2,
).replace(co_linetable=b"")
compiled = compiled.replace(co_consts=tuple(
(c.replace(co_linetable=b"") if isinstance(c, types.CodeType) else c)
for c in compiled.co_consts
))
def m_dumps(o: types.CodeType, version: int) -> bytes:
return marshal.dumps(o, version)
CONVERTS: list[tuple[Callable[[types.CodeType], bytes], bytes, int]] = [
(marshal.dumps, b"__import__('marshal').loads"),
*(
(partial(m_dumps, version=v), b"__import__('marshal').loads")
for v in range(marshal.version)
),
]
for convert, deconvert in CONVERTS:
result = b"exec(" + deconvert + b"(" + compress_bytes(convert(compiled)) + b"))"
if len(result) < min_length:
min_length = len(result)
min_code = PythonCode(result)
return min_code
def main(output: io.BytesIO = sys.stdout.buffer) -> str | int:
args = sys.argv[1:]
if len(args) <= 1:
path = Path(args[0]) if args else Path(__file__)
output.write(
compress_code(path.read_bytes(), path.name)
)
output.write(b"\n")
output.flush()
return 0
output.write(
b"def w(f, c):\n"
b" from pathlib import Path\n"
b" (p:=Path(f)).parent.mkdir(511,1,1)\n"
b" p.write_bytes(c)\n"
b"\n"
)
for file in args:
output.write(b"w(")
output.write(repr(file).encode())
output.write(b",")
path = Path(file)
output.write(repr(compress_code(path.read_bytes(), path.name)).encode())
output.write(b")\n")
output.flush()
return 0
if __name__ == "__main__":
sys.exit(main())
file=~/code/compress_python/compress_code.py
run="podman run --network=none -it --rm -v ./test.py:/test.py -v ./examples/:/examples:ro -v ./typed_stream/:/typed_stream:ro -v $file:/cpd:ro docker.io/library/python:3.10.0 python /cpd"
run="python3 $file"
$run typed_stream/*py > test.py
$run examples/*py >> test.py
$run test.py > ts.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment