Skip to content

Instantly share code, notes, and snippets.

@Ostrichbeta
Created April 14, 2026 07:12
Show Gist options
  • Select an option

  • Save Ostrichbeta/a2b5b3deb5a1deed0e5faa1b8eb9d44a to your computer and use it in GitHub Desktop.

Select an option

Save Ostrichbeta/a2b5b3deb5a1deed0e5faa1b8eb9d44a to your computer and use it in GitHub Desktop.
Unicode Rangle Filter for CSS
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import pathlib
import re
import shutil
import sys
from typing import List, Sequence, Tuple
Range = Tuple[int, int]
UNICODE_MAX = 0x10FFFF
FONT_FACE_BLOCK_RE = re.compile(r'@font-face\s*\{.*?\}', re.IGNORECASE | re.DOTALL)
UNICODE_RANGE_PROP_RE = re.compile(
r'(?P<indent>[ \t]*)unicode-range\s*:\s*(?P<value>.*?)(?P<end>\s*;|\s*(?=\}))',
re.IGNORECASE | re.DOTALL,
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Filter unicode-range in CSS @font-face blocks."
)
parser.add_argument(
"allowed_ranges",
help="Allowed Unicode ranges, e.g. 'U+A5, U+4E00-9FFF, U+3040-309F'",
)
parser.add_argument(
"paths",
nargs="+",
help="CSS files and/or directories containing CSS files",
)
parser.add_argument(
"--no-backup",
action="store_true",
help="Do not create .bak backup files before overwriting",
)
return parser.parse_args()
def parse_hex(s: str) -> int:
return int(s, 16)
def wildcard_to_range(hex_part: str) -> Range:
start = int(hex_part.replace("?", "0"), 16)
end = int(hex_part.replace("?", "F"), 16)
return (start, end)
def validate_range(start: int, end: int, token_for_error: str = "") -> Range:
if start < 0 or end < 0 or start > UNICODE_MAX or end > UNICODE_MAX:
raise ValueError(f"Out-of-range Unicode value: {token_for_error or (start, end)}")
if start > end:
raise ValueError(f"Range start > end: {token_for_error or (start, end)}")
return (start, end)
def parse_single_unicode_token(token: str) -> Range:
token = token.strip().upper()
if not token:
raise ValueError("Empty unicode-range token")
if not token.startswith("U+"):
raise ValueError(f"Invalid token (missing U+): {token}")
body = token[2:].strip()
if not body:
raise ValueError(f"Invalid token: {token}")
if "?" in body:
if "-" in body:
raise ValueError(f"Wildcard and explicit range cannot mix: {token}")
if not re.fullmatch(r"[0-9A-F?]+", body):
raise ValueError(f"Invalid wildcard token: {token}")
return validate_range(*wildcard_to_range(body), token)
if "-" in body:
left, right = body.split("-", 1)
if not left or not right:
raise ValueError(f"Invalid explicit range token: {token}")
return validate_range(parse_hex(left), parse_hex(right), token)
value = parse_hex(body)
return validate_range(value, value, token)
def merge_ranges(ranges: Sequence[Range]) -> List[Range]:
if not ranges:
return []
sorted_ranges = sorted(ranges, key=lambda r: (r[0], r[1]))
merged: List[Range] = [sorted_ranges[0]]
for start, end in sorted_ranges[1:]:
last_start, last_end = merged[-1]
if start <= last_end + 1:
merged[-1] = (last_start, max(last_end, end))
else:
merged.append((start, end))
return merged
def parse_unicode_range_list(text: str) -> List[Range]:
ranges: List[Range] = []
for raw in text.split(","):
raw = raw.strip()
if not raw:
continue
ranges.append(parse_single_unicode_token(raw))
return merge_ranges(ranges)
def intersect_two_ranges(a: Range, b: Range) -> Range | None:
start = max(a[0], b[0])
end = min(a[1], b[1])
return (start, end) if start <= end else None
def intersect_range_lists(left: Sequence[Range], right: Sequence[Range]) -> List[Range]:
result: List[Range] = []
i = 0
j = 0
left_sorted = merge_ranges(left)
right_sorted = merge_ranges(right)
while i < len(left_sorted) and j < len(right_sorted):
inter = intersect_two_ranges(left_sorted[i], right_sorted[j])
if inter is not None:
result.append(inter)
if left_sorted[i][1] < right_sorted[j][1]:
i += 1
else:
j += 1
return merge_ranges(result)
def format_range(r: Range) -> str:
start, end = r
return f"U+{start:X}" if start == end else f"U+{start:X}-{end:X}"
def format_range_list(ranges: Sequence[Range]) -> str:
return ", ".join(format_range(r) for r in ranges)
def replace_unicode_range_in_block(block: str, allowed: Sequence[Range]) -> str | None:
match = UNICODE_RANGE_PROP_RE.search(block)
if not match:
return block
original_value = match.group("value").strip()
block_ranges = parse_unicode_range_list(original_value)
kept_ranges = intersect_range_lists(block_ranges, allowed)
if not kept_ranges:
return None
indent = match.group("indent") or ""
end_marker = match.group("end")
has_semicolon = ";" in end_marker
new_prop = f"{indent}unicode-range: {format_range_list(kept_ranges)}"
if has_semicolon:
new_prop += ";"
start, end = match.span()
return block[:start] + new_prop + block[end:]
def filter_css_text(css_text: str, allowed: Sequence[Range]) -> tuple[str, int, int]:
kept_blocks = 0
removed_blocks = 0
def repl(match: re.Match[str]) -> str:
nonlocal kept_blocks, removed_blocks
block = match.group(0)
new_block = replace_unicode_range_in_block(block, allowed)
if new_block is None:
removed_blocks += 1
return ""
kept_blocks += 1
return new_block
new_text = FONT_FACE_BLOCK_RE.sub(repl, css_text)
return new_text, kept_blocks, removed_blocks
def make_backup(path: pathlib.Path) -> pathlib.Path:
backup_path = path.with_name(path.name + ".bak")
shutil.copy2(path, backup_path)
return backup_path
def process_file(path: pathlib.Path, allowed: Sequence[Range], create_backup: bool) -> None:
if not path.is_file():
raise FileNotFoundError(f"File not found: {path}")
original_text = path.read_text(encoding="utf-8")
new_text, kept_blocks, removed_blocks = filter_css_text(original_text, allowed)
backup_path = None
if create_backup:
backup_path = make_backup(path)
path.write_text(new_text, encoding="utf-8")
if backup_path:
print(
f"[OK] {path} overwritten | backup: {backup_path} | kept @font-face blocks: {kept_blocks}, removed: {removed_blocks}",
file=sys.stderr,
)
else:
print(
f"[OK] {path} overwritten | kept @font-face blocks: {kept_blocks}, removed: {removed_blocks}",
file=sys.stderr,
)
def expand_input_paths(inputs: Sequence[str]) -> List[pathlib.Path]:
"""
Expand input arguments into a flat list of CSS files.
- file.css -> [file.css]
- some_dir -> all *.css directly inside some_dir
- no recursion into subdirectories
"""
result: List[pathlib.Path] = []
seen: set[pathlib.Path] = set()
for raw in inputs:
path = pathlib.Path(raw)
if path.is_dir():
for child in sorted(path.glob("*.css")):
resolved = child.resolve()
if resolved not in seen and child.is_file():
seen.add(resolved)
result.append(child)
elif path.is_file():
if path.suffix.lower() != ".css":
print(f"[WARN] Skipping non-CSS file: {path}", file=sys.stderr)
continue
resolved = path.resolve()
if resolved not in seen:
seen.add(resolved)
result.append(path)
else:
print(f"[WARN] Skipping missing path: {path}", file=sys.stderr)
return result
def main() -> int:
args = parse_args()
try:
allowed = parse_unicode_range_list(args.allowed_ranges)
except ValueError as exc:
print(f"[ERROR] Invalid allowed-ranges input: {exc}", file=sys.stderr)
return 2
files = expand_input_paths(args.paths)
if not files:
print("[ERROR] No CSS files found to process.", file=sys.stderr)
return 2
failed = False
for css_file in files:
try:
process_file(css_file, allowed, create_backup=not args.no_backup)
except Exception as exc:
failed = True
print(f"[ERROR] {css_file}: {exc}", file=sys.stderr)
return 1 if failed else 0
if __name__ == "__main__":
raise SystemExit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment