Last active
April 28, 2026 00:22
-
-
Save donnaken15/11f0b72f4d1f71c2edf74e9bf5a0c420 to your computer and use it in GitHub Desktop.
gm8.1 gml minifier | claude crap
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // gml81-min.js — GML 8.1 minifier | |
| // Usage: node gml81-min.js <input.gml> [output.gml] | |
| // node gml81-min.js --stdin (reads stdin, writes stdout) | |
| // node gml81-min.js --test (runs built-in sanity checks) | |
| // | |
| // Flags (prepend to input arg): | |
| // --no-alias skip keyword alias substitution (and/or/not/begin/end/true/false) | |
| // --no-numbers skip float trailing-zero trimming | |
| // | |
| // thanks clanker | |
| 'use strict'; | |
| import {'readFileSync' as read,'writeFileSync' as write, 'existsSync' as exists} from 'fs'; | |
| // ─── Token types ────────────────────────────────────────────────────────────── | |
| const T = { | |
| IDENT: 'I', // identifiers and keywords | |
| NUMBER: 'N', // numeric literals (decimal, hex $FF) | |
| STRING: 'S', // "..." or '...' | |
| OP: 'O', // operators (single and multi-char) | |
| PUNCT: 'P', // (){}[];,.: | |
| }; | |
| // ─── Tokenizer ──────────────────────────────────────────────────────────────── | |
| function tokenize(src) { | |
| const toks = []; | |
| let i = 0; | |
| const len = src.length; | |
| while (i < len) { | |
| const c = src[i]; | |
| // whitespace — discard | |
| if (c === ' ' || c === '\t' || c === '\r' || c === '\n') { i++; continue; } | |
| // line comment — discard | |
| if (c === '/' && src[i + 1] === '/') { | |
| while (i < len && src[i] !== '\n') i++; | |
| continue; | |
| } | |
| // block comment — discard | |
| if (c === '/' && src[i + 1] === '*') { | |
| i += 2; | |
| while (i < len - 1 && !(src[i] === '*' && src[i + 1] === '/')) i++; | |
| i += 2; | |
| continue; | |
| } | |
| // double-quoted string | |
| if (c === '"') { | |
| let j = i + 1; | |
| while (j < len && src[j] !== '"') { if (src[j] === '\\') j++; j++; } | |
| toks.push({ t: T.STRING, v: src.slice(i, j + 1) }); | |
| i = j + 1; | |
| continue; | |
| } | |
| // single-quoted string (GML 8.1 supports these) | |
| if (c === "'") { | |
| let j = i + 1; | |
| while (j < len && src[j] !== "'") { if (src[j] === '\\') j++; j++; } | |
| toks.push({ t: T.STRING, v: src.slice(i, j + 1) }); | |
| i = j + 1; | |
| continue; | |
| } | |
| // hex literal $FFFF | |
| if (c === '$') { | |
| let j = i + 1; | |
| while (j < len && /[0-9a-fA-F]/.test(src[j])) j++; | |
| toks.push({ t: T.NUMBER, v: src.slice(i, j) }); | |
| i = j; | |
| continue; | |
| } | |
| // decimal / float literal | |
| if (c >= '0' && c <= '9') { | |
| let j = i; | |
| while (j < len && src[j] >= '0' && src[j] <= '9') j++; | |
| if (src[j] === '.' && j + 1 < len && src[j + 1] >= '0' && src[j + 1] <= '9') { | |
| j++; | |
| while (j < len && src[j] >= '0' && src[j] <= '9') j++; | |
| } | |
| toks.push({ t: T.NUMBER, v: src.slice(i, j) }); | |
| i = j; | |
| continue; | |
| } | |
| // identifier / keyword | |
| if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_') { | |
| let j = i; | |
| while (j < len && /[a-zA-Z0-9_]/.test(src[j])) j++; | |
| toks.push({ t: T.IDENT, v: src.slice(i, j) }); | |
| i = j; | |
| continue; | |
| } | |
| // two-char operators (check before single) | |
| const two = src.slice(i, i + 2); | |
| if (['==','!=','<>','<=','>=','&&','||','^^', | |
| '+=','-=','*=','/=','|=','&=','^=','<<','>>'].includes(two)) { | |
| toks.push({ t: T.OP, v: two }); | |
| i += 2; | |
| continue; | |
| } | |
| // single-char operators | |
| if ('+-*/%=<>!&|^~'.includes(c)) { | |
| toks.push({ t: T.OP, v: c }); | |
| i++; | |
| continue; | |
| } | |
| // punctuation (includes # for #define lines — kept verbatim) | |
| if ('(){}[];,.:?#@'.includes(c)) { | |
| toks.push({ t: T.PUNCT, v: c }); | |
| i++; | |
| continue; | |
| } | |
| // unknown — keep as PUNCT so it doesn't disappear | |
| toks.push({ t: T.PUNCT, v: c }); | |
| i++; | |
| } | |
| return toks; | |
| } | |
| // ─── Keyword alias substitution ─────────────────────────────────────────────── | |
| // These are semantically equivalent shorter forms in GML 8.1. | |
| // true/false → 1/0 saves chars; begin/end → {/} saves chars. | |
| // and/or/not → &&/||/! saves 1 char each | |
| const ALIASES = { | |
| 'and': { t: T.OP, v: '&&' }, | |
| 'or': { t: T.OP, v: '||' }, | |
| 'not': { t: T.OP, v: '!' }, | |
| 'begin': { t: T.PUNCT, v: '{' }, | |
| 'end': { t: T.PUNCT, v: '}' }, | |
| 'true': { t: T.NUMBER, v: '1' }, | |
| 'false': { t: T.NUMBER, v: '0' }, | |
| }; | |
| const applyAliases = toks=>toks.map(tok=>(tok.t === T.IDENT && ALIASES[tok.v]) ? ALIASES[tok.v] : tok); | |
| // ─── Float trimming: 1.00000 → 1, 1.50000 → 1.5 ───────────────────────────── | |
| function trimNumbers(toks) { | |
| return toks.map(tok => { | |
| if (tok.t !== T.NUMBER || !tok.v.includes('.')) return tok; | |
| // strip trailing zeros after decimal, remove dot if nothing left | |
| let v = tok.v.replace(/(\.\d*?)0+$/, '$1').replace(/\.$/, ''); | |
| return { t: T.NUMBER, v }; | |
| }); | |
| } | |
| // ─── Semicolon removal ──────────────────────────────────────────────────────── | |
| // Drop semicolons everywhere EXCEPT inside for() header parens, | |
| // where they separate init; cond; step. | |
| // Also preserve them inside #define ... lines (unusual in 8.1 but safe). | |
| function dropSemicolons(toks) { | |
| const out = []; | |
| let forParenDepth = 0; // >0 = inside a for()'s header | |
| let nextIsForParen = false; | |
| for (let i = 0; i < toks.length; i++) { | |
| const tok = toks[i]; | |
| // Detect 'for' keyword | |
| if (tok.t === T.IDENT && tok.v === 'for') { | |
| nextIsForParen = true; | |
| out.push(tok); | |
| continue; | |
| } | |
| // Opening ( after 'for' | |
| if (nextIsForParen) { | |
| nextIsForParen = false; | |
| if (tok.t === T.PUNCT && tok.v === '(') { | |
| forParenDepth = 1; | |
| out.push(tok); | |
| continue; | |
| } | |
| // 'for' wasn't followed by '(' — unusual but handle gracefully | |
| } | |
| // Track paren depth inside for header | |
| if (forParenDepth > 0) { | |
| if (tok.t === T.PUNCT && tok.v === '(') forParenDepth++; | |
| if (tok.t === T.PUNCT && tok.v === ')') forParenDepth--; | |
| out.push(tok); // keep everything inside for(...) including semicolons | |
| continue; | |
| } | |
| // Outside for header: drop semicolons | |
| if (tok.t === T.PUNCT && tok.v === ';') continue; | |
| out.push(tok); | |
| } | |
| return out; | |
| } | |
| // ─── Separator decision ─────────────────────────────────────────────────────── | |
| // Returns true if a space is required between token a and token b | |
| // to prevent them merging into a different token or statement. | |
| // | |
| // Key rules exploited: | |
| // digit-break: NUMBER → IDENT is an implicit statement boundary, no space needed | |
| // ) → anything: GML treats ) as a statement boundary, no space needed | |
| // } → anything: same | |
| // IDENT → IDENT: ALWAYS needs space (would merge into one identifier) | |
| // IDENT → NUMBER: ALWAYS needs space (x5 is one identifier in GML) | |
| // | |
| function needsSpace(a, b) { | |
| const [at, bt] = [a.t, b.t]; | |
| const [av, bv] = [a.v, b.v]; | |
| // Two identifiers always need a space — this is the main rule | |
| if (at === T.IDENT && bt === T.IDENT) return true; | |
| // Identifier followed by number: x5 would be one token — needs space | |
| if (at === T.IDENT && bt === T.NUMBER) return true; | |
| // Two numbers in a row: unusual but needs space (e.g. return 0 1 is weird but safe) | |
| if (at === T.NUMBER && bt === T.NUMBER) return true; | |
| // OP → OP: guard against accidental multi-char op formation. | |
| // e.g. after !, if next is = we'd get != which changes meaning. | |
| // e.g. after +, if next is + we'd get ++ (invalid in GML 8.1 but confuses parser). | |
| // The tokenizer already collapsed known two-char ops in the SOURCE, | |
| // but at the output stage we might be combining things from different statements. | |
| const CANNOT_START = { '!': '=', '+': '+', '-': '-', '<': '<>', '&': '&', '|': '|', '^': '^', '=': '=' }; | |
| if (at === T.OP && bt === T.OP) { | |
| const risk = CANNOT_START[av[av.length - 1]]; | |
| if (risk && risk.includes(bv[0])) return true; | |
| } | |
| // String followed by string: unusual but would smash together if we ever hit it | |
| if (at === T.STRING && bt === T.STRING) return true; | |
| return false; | |
| } | |
| // ─── var / globalvar semicolon fixup ───────────────────────────────────────── | |
| // GML 8.1 requires a statement terminator between a var/globalvar declaration | |
| // and the next statement. `var x x=1` errors; `var x;x=1` is required. | |
| // Run this AFTER dropSemicolons so the injected semicolons survive. | |
| function fixVarSemicolons(toks) { | |
| const out = []; | |
| let i = 0; | |
| while (i < toks.length) { | |
| const tok = toks[i]; | |
| if (tok.t === T.IDENT && (tok.v === 'var' || tok.v === 'globalvar')) { | |
| out.push(tok); | |
| i++; | |
| // consume the declaration list: IDENT (, IDENT)* | |
| let declared = 0; | |
| while (i < toks.length && toks[i].t === T.IDENT) { | |
| out.push(toks[i]); | |
| i++; | |
| declared++; | |
| if (i < toks.length && toks[i].t === T.PUNCT && toks[i].v === ',') { | |
| out.push(toks[i]); // keep the comma | |
| i++; | |
| // next iteration will consume the next IDENT | |
| } else { | |
| break; | |
| } | |
| } | |
| // inject semicolon after the list if more code follows | |
| if (declared > 0 && i < toks.length) { | |
| out.push({ t: T.PUNCT, v: ';' }); | |
| } | |
| } else { | |
| out.push(tok); | |
| i++; | |
| } | |
| } | |
| return out; | |
| } | |
| // ─── Main minify pipeline ───────────────────────────────────────────────────── | |
| function minify(src, opts) { | |
| let toks = tokenize(src); | |
| if (opts.alias) toks = applyAliases(toks); | |
| if (opts.numbers) toks = trimNumbers(toks); | |
| toks = dropSemicolons(toks); | |
| toks = fixVarSemicolons(toks); // re-inject ; after var/globalvar lists | |
| let out = ''; | |
| for (let i = 0; i < toks.length; i++) { | |
| out += toks[i].v; | |
| if (toks[i + 1] && needsSpace(toks[i], toks[i + 1])) out += ' '; | |
| } | |
| return out; | |
| } | |
| var {stdout,stderr,argv,exit} = process; | |
| // ─── Built-in sanity checks ─────────────────────────────────────────────────── | |
| function runTests() { | |
| const cases = [ | |
| // [description, input, expected_output] | |
| ['digit-break: no space between number→ident', | |
| 'a=0\nb=1', 'a=0b=1'], | |
| ['var decl gets semicolon before next statement', | |
| 'var xx\nxx=1', 'var xx;xx=1'], | |
| ['var multi-decl gets one semicolon at end', | |
| 'var a,b,c\na=1', 'var a,b,c;a=1'], | |
| ['globalvar decl gets semicolon', | |
| 'globalvar g\ng=0', 'globalvar g;g=0'], | |
| ['var at end of script (no trailing semicolon)', | |
| 'var x', 'var x'], | |
| ['comments stripped', | |
| '// comment\na=1 /* block */ b=2', 'a=1b=2'], | |
| ['semicolons dropped', | |
| 'a=1;b=2;c=3', 'a=1b=2c=3'], | |
| ['for semicolons kept', | |
| 'for(i=0;i<10;i+=1){a=1}', 'for(i=0;i<10;i+=1){a=1}'], | |
| ['alias: and→&&, or→||, not→!', | |
| 'if a and b or not c{}', 'if a&&b||!c{}'], | |
| ['alias: begin/end→{/}', | |
| 'if x begin a=1 end', 'if x{a=1}'], | |
| ['alias: true/false→1/0', | |
| 'a=true\nb=false', 'a=1b=0'], | |
| ['mod stays as mod with space', | |
| 'a=b mod c', 'a=b mod c'], | |
| ['float trim', | |
| 'x=1.000\ny=1.500', 'x=1y=1.5'], | |
| ['no space after )', | |
| 'foo()if x{}', 'foo()if x{}'], | |
| ['no space after }', | |
| 'if x{a=1}if y{b=2}', 'if x{a=1}if y{b=2}'], | |
| ['string preserved', | |
| 'a="hello world"', 'a="hello world"'], | |
| ['op collision guard: != preserved', | |
| 'if a!=b{}', 'if a!=b{}'], | |
| ['ident keyword followed by ! operator (no space needed)', | |
| 'if !a=0{}', 'if!a=0{}'], | |
| ]; | |
| let pass = 0, fail = 0; | |
| const opts = { alias: true, numbers: true }; | |
| for (const [desc, input, expected] of cases) { | |
| const got = minify(input, opts); | |
| if (got === expected) { | |
| stdout.write(` PASS ${desc}\n`); | |
| pass++; | |
| } else { | |
| stdout.write(` FAIL ${desc}\n`); | |
| stdout.write(` expected: ${expected}\n`); | |
| stdout.write(` got: ${got}\n`); | |
| fail++; | |
| } | |
| } | |
| stdout.write(`\n${pass}/${pass + fail} tests passed\n`); | |
| exit(fail > 0 ? 1 : 0); | |
| } | |
| // ─── CLI ────────────────────────────────────────────────────────────────────── | |
| argv.splice(0,2); | |
| if (argv.includes('--test')) { | |
| runTests(); | |
| // unreachable | |
| } | |
| const opts = { | |
| alias: !argv.includes('--no-alias'), | |
| numbers: !argv.includes('--no-numbers'), | |
| }; // use fromentries spread | |
| const files = argv.filter(a => !a.startsWith('--')); | |
| let src; | |
| if (argv.includes('--stdin') || files.length === 0) { | |
| src = read(0, 'utf8'); | |
| } else { | |
| if (!exists(files[0])) { | |
| stderr.write(`gml81-min: file not found: ${files[0]}\n`); | |
| exit(1); | |
| } | |
| src = read(files[0], 'utf8'); | |
| } | |
| const result = minify(src, opts); | |
| if (files[1]) { | |
| write(files[1], result, 'utf8'); | |
| const pct = ((1 - result.length / src.length) * 100).toFixed(1); | |
| stderr.write(`${src.length} → ${result.length} bytes (${pct}% smaller)\n`); | |
| } else { | |
| stdout.write(result + '\n'); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment