Skip to content

Instantly share code, notes, and snippets.

@donnaken15
Last active April 28, 2026 00:22
Show Gist options
  • Select an option

  • Save donnaken15/11f0b72f4d1f71c2edf74e9bf5a0c420 to your computer and use it in GitHub Desktop.

Select an option

Save donnaken15/11f0b72f4d1f71c2edf74e9bf5a0c420 to your computer and use it in GitHub Desktop.
gm8.1 gml minifier | claude crap
// gml81-min.js — GML 8.1 minifier
// Usage: node gml81-min.js <input.gml> [output.gml]
// node gml81-min.js --stdin (reads stdin, writes stdout)
// node gml81-min.js --test (runs built-in sanity checks)
//
// Flags (prepend to input arg):
// --no-alias skip keyword alias substitution (and/or/not/begin/end/true/false)
// --no-numbers skip float trailing-zero trimming
//
// thanks clanker
'use strict';
import {'readFileSync' as read,'writeFileSync' as write, 'existsSync' as exists} from 'fs';
// ─── Token types ──────────────────────────────────────────────────────────────
const T = {
IDENT: 'I', // identifiers and keywords
NUMBER: 'N', // numeric literals (decimal, hex $FF)
STRING: 'S', // "..." or '...'
OP: 'O', // operators (single and multi-char)
PUNCT: 'P', // (){}[];,.:
};
// ─── Tokenizer ────────────────────────────────────────────────────────────────
function tokenize(src) {
const toks = [];
let i = 0;
const len = src.length;
while (i < len) {
const c = src[i];
// whitespace — discard
if (c === ' ' || c === '\t' || c === '\r' || c === '\n') { i++; continue; }
// line comment — discard
if (c === '/' && src[i + 1] === '/') {
while (i < len && src[i] !== '\n') i++;
continue;
}
// block comment — discard
if (c === '/' && src[i + 1] === '*') {
i += 2;
while (i < len - 1 && !(src[i] === '*' && src[i + 1] === '/')) i++;
i += 2;
continue;
}
// double-quoted string
if (c === '"') {
let j = i + 1;
while (j < len && src[j] !== '"') { if (src[j] === '\\') j++; j++; }
toks.push({ t: T.STRING, v: src.slice(i, j + 1) });
i = j + 1;
continue;
}
// single-quoted string (GML 8.1 supports these)
if (c === "'") {
let j = i + 1;
while (j < len && src[j] !== "'") { if (src[j] === '\\') j++; j++; }
toks.push({ t: T.STRING, v: src.slice(i, j + 1) });
i = j + 1;
continue;
}
// hex literal $FFFF
if (c === '$') {
let j = i + 1;
while (j < len && /[0-9a-fA-F]/.test(src[j])) j++;
toks.push({ t: T.NUMBER, v: src.slice(i, j) });
i = j;
continue;
}
// decimal / float literal
if (c >= '0' && c <= '9') {
let j = i;
while (j < len && src[j] >= '0' && src[j] <= '9') j++;
if (src[j] === '.' && j + 1 < len && src[j + 1] >= '0' && src[j + 1] <= '9') {
j++;
while (j < len && src[j] >= '0' && src[j] <= '9') j++;
}
toks.push({ t: T.NUMBER, v: src.slice(i, j) });
i = j;
continue;
}
// identifier / keyword
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_') {
let j = i;
while (j < len && /[a-zA-Z0-9_]/.test(src[j])) j++;
toks.push({ t: T.IDENT, v: src.slice(i, j) });
i = j;
continue;
}
// two-char operators (check before single)
const two = src.slice(i, i + 2);
if (['==','!=','<>','<=','>=','&&','||','^^',
'+=','-=','*=','/=','|=','&=','^=','<<','>>'].includes(two)) {
toks.push({ t: T.OP, v: two });
i += 2;
continue;
}
// single-char operators
if ('+-*/%=<>!&|^~'.includes(c)) {
toks.push({ t: T.OP, v: c });
i++;
continue;
}
// punctuation (includes # for #define lines — kept verbatim)
if ('(){}[];,.:?#@'.includes(c)) {
toks.push({ t: T.PUNCT, v: c });
i++;
continue;
}
// unknown — keep as PUNCT so it doesn't disappear
toks.push({ t: T.PUNCT, v: c });
i++;
}
return toks;
}
// ─── Keyword alias substitution ───────────────────────────────────────────────
// These are semantically equivalent shorter forms in GML 8.1.
// true/false → 1/0 saves chars; begin/end → {/} saves chars.
// and/or/not → &&/||/! saves 1 char each
const ALIASES = {
'and': { t: T.OP, v: '&&' },
'or': { t: T.OP, v: '||' },
'not': { t: T.OP, v: '!' },
'begin': { t: T.PUNCT, v: '{' },
'end': { t: T.PUNCT, v: '}' },
'true': { t: T.NUMBER, v: '1' },
'false': { t: T.NUMBER, v: '0' },
};
const applyAliases = toks=>toks.map(tok=>(tok.t === T.IDENT && ALIASES[tok.v]) ? ALIASES[tok.v] : tok);
// ─── Float trimming: 1.00000 → 1, 1.50000 → 1.5 ─────────────────────────────
function trimNumbers(toks) {
return toks.map(tok => {
if (tok.t !== T.NUMBER || !tok.v.includes('.')) return tok;
// strip trailing zeros after decimal, remove dot if nothing left
let v = tok.v.replace(/(\.\d*?)0+$/, '$1').replace(/\.$/, '');
return { t: T.NUMBER, v };
});
}
// ─── Semicolon removal ────────────────────────────────────────────────────────
// Drop semicolons everywhere EXCEPT inside for() header parens,
// where they separate init; cond; step.
// Also preserve them inside #define ... lines (unusual in 8.1 but safe).
function dropSemicolons(toks) {
const out = [];
let forParenDepth = 0; // >0 = inside a for()'s header
let nextIsForParen = false;
for (let i = 0; i < toks.length; i++) {
const tok = toks[i];
// Detect 'for' keyword
if (tok.t === T.IDENT && tok.v === 'for') {
nextIsForParen = true;
out.push(tok);
continue;
}
// Opening ( after 'for'
if (nextIsForParen) {
nextIsForParen = false;
if (tok.t === T.PUNCT && tok.v === '(') {
forParenDepth = 1;
out.push(tok);
continue;
}
// 'for' wasn't followed by '(' — unusual but handle gracefully
}
// Track paren depth inside for header
if (forParenDepth > 0) {
if (tok.t === T.PUNCT && tok.v === '(') forParenDepth++;
if (tok.t === T.PUNCT && tok.v === ')') forParenDepth--;
out.push(tok); // keep everything inside for(...) including semicolons
continue;
}
// Outside for header: drop semicolons
if (tok.t === T.PUNCT && tok.v === ';') continue;
out.push(tok);
}
return out;
}
// ─── Separator decision ───────────────────────────────────────────────────────
// Returns true if a space is required between token a and token b
// to prevent them merging into a different token or statement.
//
// Key rules exploited:
// digit-break: NUMBER → IDENT is an implicit statement boundary, no space needed
// ) → anything: GML treats ) as a statement boundary, no space needed
// } → anything: same
// IDENT → IDENT: ALWAYS needs space (would merge into one identifier)
// IDENT → NUMBER: ALWAYS needs space (x5 is one identifier in GML)
//
function needsSpace(a, b) {
const [at, bt] = [a.t, b.t];
const [av, bv] = [a.v, b.v];
// Two identifiers always need a space — this is the main rule
if (at === T.IDENT && bt === T.IDENT) return true;
// Identifier followed by number: x5 would be one token — needs space
if (at === T.IDENT && bt === T.NUMBER) return true;
// Two numbers in a row: unusual but needs space (e.g. return 0 1 is weird but safe)
if (at === T.NUMBER && bt === T.NUMBER) return true;
// OP → OP: guard against accidental multi-char op formation.
// e.g. after !, if next is = we'd get != which changes meaning.
// e.g. after +, if next is + we'd get ++ (invalid in GML 8.1 but confuses parser).
// The tokenizer already collapsed known two-char ops in the SOURCE,
// but at the output stage we might be combining things from different statements.
const CANNOT_START = { '!': '=', '+': '+', '-': '-', '<': '<>', '&': '&', '|': '|', '^': '^', '=': '=' };
if (at === T.OP && bt === T.OP) {
const risk = CANNOT_START[av[av.length - 1]];
if (risk && risk.includes(bv[0])) return true;
}
// String followed by string: unusual but would smash together if we ever hit it
if (at === T.STRING && bt === T.STRING) return true;
return false;
}
// ─── var / globalvar semicolon fixup ─────────────────────────────────────────
// GML 8.1 requires a statement terminator between a var/globalvar declaration
// and the next statement. `var x x=1` errors; `var x;x=1` is required.
// Run this AFTER dropSemicolons so the injected semicolons survive.
function fixVarSemicolons(toks) {
const out = [];
let i = 0;
while (i < toks.length) {
const tok = toks[i];
if (tok.t === T.IDENT && (tok.v === 'var' || tok.v === 'globalvar')) {
out.push(tok);
i++;
// consume the declaration list: IDENT (, IDENT)*
let declared = 0;
while (i < toks.length && toks[i].t === T.IDENT) {
out.push(toks[i]);
i++;
declared++;
if (i < toks.length && toks[i].t === T.PUNCT && toks[i].v === ',') {
out.push(toks[i]); // keep the comma
i++;
// next iteration will consume the next IDENT
} else {
break;
}
}
// inject semicolon after the list if more code follows
if (declared > 0 && i < toks.length) {
out.push({ t: T.PUNCT, v: ';' });
}
} else {
out.push(tok);
i++;
}
}
return out;
}
// ─── Main minify pipeline ─────────────────────────────────────────────────────
function minify(src, opts) {
let toks = tokenize(src);
if (opts.alias) toks = applyAliases(toks);
if (opts.numbers) toks = trimNumbers(toks);
toks = dropSemicolons(toks);
toks = fixVarSemicolons(toks); // re-inject ; after var/globalvar lists
let out = '';
for (let i = 0; i < toks.length; i++) {
out += toks[i].v;
if (toks[i + 1] && needsSpace(toks[i], toks[i + 1])) out += ' ';
}
return out;
}
var {stdout,stderr,argv,exit} = process;
// ─── Built-in sanity checks ───────────────────────────────────────────────────
function runTests() {
const cases = [
// [description, input, expected_output]
['digit-break: no space between number→ident',
'a=0\nb=1', 'a=0b=1'],
['var decl gets semicolon before next statement',
'var xx\nxx=1', 'var xx;xx=1'],
['var multi-decl gets one semicolon at end',
'var a,b,c\na=1', 'var a,b,c;a=1'],
['globalvar decl gets semicolon',
'globalvar g\ng=0', 'globalvar g;g=0'],
['var at end of script (no trailing semicolon)',
'var x', 'var x'],
['comments stripped',
'// comment\na=1 /* block */ b=2', 'a=1b=2'],
['semicolons dropped',
'a=1;b=2;c=3', 'a=1b=2c=3'],
['for semicolons kept',
'for(i=0;i<10;i+=1){a=1}', 'for(i=0;i<10;i+=1){a=1}'],
['alias: and→&&, or→||, not→!',
'if a and b or not c{}', 'if a&&b||!c{}'],
['alias: begin/end→{/}',
'if x begin a=1 end', 'if x{a=1}'],
['alias: true/false→1/0',
'a=true\nb=false', 'a=1b=0'],
['mod stays as mod with space',
'a=b mod c', 'a=b mod c'],
['float trim',
'x=1.000\ny=1.500', 'x=1y=1.5'],
['no space after )',
'foo()if x{}', 'foo()if x{}'],
['no space after }',
'if x{a=1}if y{b=2}', 'if x{a=1}if y{b=2}'],
['string preserved',
'a="hello world"', 'a="hello world"'],
['op collision guard: != preserved',
'if a!=b{}', 'if a!=b{}'],
['ident keyword followed by ! operator (no space needed)',
'if !a=0{}', 'if!a=0{}'],
];
let pass = 0, fail = 0;
const opts = { alias: true, numbers: true };
for (const [desc, input, expected] of cases) {
const got = minify(input, opts);
if (got === expected) {
stdout.write(` PASS ${desc}\n`);
pass++;
} else {
stdout.write(` FAIL ${desc}\n`);
stdout.write(` expected: ${expected}\n`);
stdout.write(` got: ${got}\n`);
fail++;
}
}
stdout.write(`\n${pass}/${pass + fail} tests passed\n`);
exit(fail > 0 ? 1 : 0);
}
// ─── CLI ──────────────────────────────────────────────────────────────────────
argv.splice(0,2);
if (argv.includes('--test')) {
runTests();
// unreachable
}
const opts = {
alias: !argv.includes('--no-alias'),
numbers: !argv.includes('--no-numbers'),
}; // use fromentries spread
const files = argv.filter(a => !a.startsWith('--'));
let src;
if (argv.includes('--stdin') || files.length === 0) {
src = read(0, 'utf8');
} else {
if (!exists(files[0])) {
stderr.write(`gml81-min: file not found: ${files[0]}\n`);
exit(1);
}
src = read(files[0], 'utf8');
}
const result = minify(src, opts);
if (files[1]) {
write(files[1], result, 'utf8');
const pct = ((1 - result.length / src.length) * 100).toFixed(1);
stderr.write(`${src.length} → ${result.length} bytes (${pct}% smaller)\n`);
} else {
stdout.write(result + '\n');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment