Created
January 3, 2026 00:13
-
-
Save factubsio/e29335014b7ee15de69e3b857fa568e4 to your computer and use it in GitHub Desktop.
bubble lang parser at somew version
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { isFeatureDefNode, isPropertyNode } from './ast-utils'; | |
| import { | |
| Token, | |
| TokenType, | |
| AstNode, | |
| BlueprintNode, | |
| PropertyNode, | |
| PrimitiveLiteralNode, | |
| AstKind, | |
| BodyStatement, | |
| InvocationNode, | |
| FileNode, | |
| ModifyNode, | |
| ModifyStatementNode, | |
| ModifyClassGainsLoses, | |
| ReferenceNode, | |
| ObjectLiteralNode, | |
| ValueNode, | |
| FeatureNode, | |
| ListLiteralNode | |
| } from './syntax'; | |
| export interface ParserError { | |
| message: string; | |
| code?: string; | |
| token: Token; | |
| } | |
| type ParseResult<T> = { | |
| value: T; | |
| error: null; | |
| } | { | |
| value: T | null; // Can hold a partial result on error | |
| error: ParserError; | |
| }; | |
| function ok<T>(value: T): ParseResult<T> { | |
| return { value, error: null }; | |
| } | |
| export class Parser { | |
| readonly tokens: Token[]; | |
| current = 0; | |
| errors: ParserError[] = []; | |
| constructor(tokens: Token[]) { | |
| this.tokens = tokens.filter(t => t.type !== TokenType.Unknown); | |
| } | |
| bad<T>(token: Token, message: string, partial: T | null = null): ParseResult<T> { | |
| const error: ParserError = { message, token }; | |
| this.errors.push(error); | |
| return { value: partial, error }; | |
| } | |
| castBad<T, R>(inner: ParseResult<T>): ParseResult<R> { | |
| return { error: inner.error!, value: null }; | |
| } | |
| public parse(path: string): { ast: FileNode | null, errors: ParserError[] } { | |
| try { | |
| if (this.isAtEnd()) { | |
| return { ast: null, errors: this.errors }; | |
| } | |
| const file: FileNode = { | |
| tokens: this.tokens, | |
| path, | |
| kind: 'File', | |
| blueprints: [], | |
| modifies: [], | |
| startToken: this.tokens[0], | |
| endToken: this.tokens[this.tokens.length - 1], | |
| }; | |
| while (!this.isAtEnd()) { | |
| const next = this.peek(); | |
| if (next.lexeme === 'blueprint') { | |
| const blueprint = this.parseBlueprint(); | |
| blueprint.parent = file; | |
| file.blueprints.push(blueprint); | |
| } else if (next.lexeme === 'modify') { | |
| const modify = this.parseModify(); | |
| modify.parent = file; | |
| file.modifies.push(modify); | |
| } else { | |
| this.errors.push({ message: 'unexpected keyword', token: next }); | |
| this.synchronize(); | |
| this.advance(); | |
| } | |
| } | |
| return { ast: file, errors: this.errors }; | |
| } catch (error) { | |
| return { ast: null, errors: this.errors }; | |
| } | |
| } | |
| parseModify(): ModifyNode { | |
| const startToken = this.peek(); | |
| const partialNode: Partial<ModifyNode> & { kind: 'Modify', startToken: Token } = { | |
| kind: AstKind.Modify, | |
| startToken: startToken, | |
| statements: [], | |
| }; | |
| partialNode.lead = startToken; | |
| const keyworkdRes = this.consumeKeyword('modify', "Expected 'modify' keyword to start a definition."); | |
| if (keyworkdRes.error) { | |
| this.synchronize(); | |
| return { ...partialNode, endToken: this.previous() } as ModifyNode; | |
| } | |
| const targetResult = this.consume(TokenType.Identifier, "Expected a name for the target blueprint."); | |
| if (targetResult.error) { | |
| this.synchronize(); | |
| return { ...partialNode, endToken: this.previous() } as ModifyNode; | |
| } | |
| partialNode.target = { startToken: targetResult.value, endToken: targetResult.value, kind: 'Reference', value: targetResult.value.lexeme }; | |
| const lBraceResult = this.consume(TokenType.LeftBrace, "Expected '{' to begin the modify block."); | |
| if (lBraceResult.error) { | |
| return { ...partialNode, endToken: this.previous() } as ModifyNode; | |
| } | |
| while (!this.check(TokenType.RightBrace) && !this.isAtEnd()) { | |
| const stmt = this.parseModifyStatement(); | |
| if (stmt.value) { | |
| partialNode.statements!.push(stmt.value); | |
| } | |
| if (stmt.error) { | |
| this.synchronize(); | |
| } | |
| } | |
| const endToken = this.consume(TokenType.RightBrace, "Expected '}' to close the modify block.").value ?? this.previous(); | |
| partialNode.endToken = endToken; | |
| return partialNode as ModifyNode; | |
| } | |
| parseBlueprint(): BlueprintNode { | |
| const startToken = this.peek(); | |
| const partialNode: Partial<BlueprintNode> & { kind: 'Blueprint', startToken: Token } = { | |
| kind: AstKind.Blueprint, | |
| startToken: startToken, | |
| props: [], | |
| stmts: [], | |
| featureDefs: [], | |
| }; | |
| partialNode.lead = startToken; | |
| const blueprintKeywordResult = this.consumeKeyword('blueprint', "Expected 'blueprint' keyword to start a definition."); | |
| if (blueprintKeywordResult.error) { | |
| this.synchronize(); | |
| return { ...partialNode, endToken: this.previous() } as BlueprintNode; | |
| } | |
| const nameResult = this.consume(TokenType.Identifier, "Expected a name for the blueprint."); | |
| partialNode.name = nameResult.value ?? undefined; | |
| if (nameResult.error) { | |
| this.synchronize(); | |
| return { ...partialNode, endToken: this.previous() } as BlueprintNode; | |
| } | |
| const extendsResult = this.consumeKeyword('extends', "Expected 'extends' keyword after the blueprint name."); | |
| if (extendsResult.error) { | |
| this.synchronize(); | |
| return { ...partialNode, endToken: this.previous() } as BlueprintNode; | |
| } | |
| partialNode.extends = this.previous(); | |
| const baseTypeResult = this.consume(TokenType.Identifier, "Expected a base type for the blueprint."); | |
| partialNode.baseType = baseTypeResult.value ?? undefined; | |
| if (baseTypeResult.error) { | |
| this.synchronize(); | |
| return { ...partialNode, endToken: this.previous() } as BlueprintNode; | |
| } | |
| const lBraceResult = this.consume(TokenType.LeftBrace, "Expected '{' to begin the blueprint body."); | |
| if (lBraceResult.error) { | |
| return { ...partialNode, endToken: this.previous() } as BlueprintNode; | |
| } | |
| while (!this.check(TokenType.RightBrace) && !this.isAtEnd()) { | |
| const stmt = this.parseBodyStatement(); | |
| if (stmt.value) { | |
| if (isPropertyNode(stmt.value)) | |
| partialNode.props?.push(stmt.value); | |
| else if (isFeatureDefNode(stmt.value)) | |
| partialNode.featureDefs?.push(stmt.value); | |
| else | |
| partialNode.stmts?.push(stmt.value); | |
| } | |
| if (stmt.error) { | |
| this.synchronize(); | |
| } | |
| } | |
| const endToken = this.consume(TokenType.RightBrace, "Expected '}' to close the blueprint body.").value ?? this.previous(); | |
| partialNode.endToken = endToken; | |
| return partialNode as BlueprintNode; | |
| } | |
| parseModifyStatement(): ParseResult<ModifyStatementNode> { | |
| const token = this.peek(); | |
| if (token.type === TokenType.Identifier && (token.lexeme === 'gains' || token.lexeme === 'loses')) { | |
| return this.parseGainsLoses(); | |
| } | |
| return this.bad<ModifyStatementNode>(token, "Expected a statement (e.g. 'gains', 'loses', etc."); | |
| } | |
| parseBodyStatement(): ParseResult<BodyStatement> { | |
| const token = this.peek(); | |
| if (token.type === TokenType.Keyword && (token.lexeme === 'add' || token.lexeme === 'run')) { | |
| return this.parseInvocation(); | |
| } else if (token.type === TokenType.Keyword && token.lexeme === 'feature') { | |
| return this.parseFeatureDef(); | |
| } else if (token.type === TokenType.Identifier) { | |
| return this.parseProperty(); | |
| } | |
| return this.bad<BodyStatement>(token, "Expected a statement (e.g., a property, 'add', or 'run')."); | |
| } | |
| parseFeatureDef() { | |
| this.advance(); | |
| const feature = this.parseFragment<FeatureNode>(this.featureRule); | |
| return feature; | |
| } | |
| parseInvocation(): ParseResult<InvocationNode> { | |
| const keyword = this.advance(); | |
| const invocation: InvocationNode = { | |
| kind: 'Invocation', | |
| arguments: [], | |
| body: null, | |
| keyword, | |
| typeName: this.synthesizeToken(keyword, ''), | |
| startToken: keyword, | |
| endToken: keyword, | |
| }; | |
| const typeName = this.consume(TokenType.Identifier, `Expected a type name after '${keyword.lexeme}'.`); | |
| if (typeName.error) { | |
| return { error: typeName.error, value: invocation }; | |
| } | |
| invocation.typeName = typeName.value; | |
| if (this.match(TokenType.LeftParen)) { | |
| if (!this.check(TokenType.RightParen)) { | |
| do { | |
| const arg = this.parseValue(); | |
| if (arg === null) { | |
| this.errors.push({ token: this.peek(), message: "Expected an argument (e.g., a number or string)." }); | |
| break; | |
| } | |
| if (arg.kind === AstKind.Literal) { | |
| invocation.arguments.push(arg as PrimitiveLiteralNode); | |
| } else { | |
| this.errors.push({ token: arg?.startToken, message: "Complex values are not allowed as terse arguments." }); | |
| break; | |
| } | |
| } while (this.match(TokenType.Comma)); | |
| } | |
| const rightParen = this.consume(TokenType.RightParen, "Expected ')' to close argument list."); | |
| if (rightParen.error) { | |
| return { error: rightParen.error, value: invocation }; | |
| } | |
| } | |
| if (this.check(TokenType.LeftBrace)) { | |
| const body = this.parseObjectLiteral(); | |
| invocation.body = body.value; | |
| if (body.value?.endToken) { | |
| invocation.endToken = body.value.endToken; | |
| } | |
| if (body.error) { | |
| return { error: body.error, value: invocation }; | |
| } | |
| } | |
| return ok(invocation); | |
| } | |
| // Enters with current token pre-checked as '[' | |
| parseListLiteral(): ParseResult<ListLiteralNode> { | |
| const obj: ListLiteralNode = { | |
| startToken: this.peek(), | |
| endToken: this.peek(), | |
| kind: 'LiteralList', | |
| elements: [], | |
| }; | |
| this.advance(); | |
| while (!this.check(TokenType.RightBracket) && !this.isAtEnd()) { | |
| var value = this.parseValue(); | |
| if (!value) { | |
| this.synchronize(); | |
| return this.bad(this.previous(), 'could not parse list item', obj); | |
| } | |
| obj.elements.push(value); | |
| } | |
| const end = this.consume(TokenType.RightBracket, "Expected ']'."); | |
| if (end.error) { | |
| return { error: end.error, value: obj }; | |
| } | |
| if (end.value) { | |
| obj.endToken = end.value; | |
| } | |
| return ok(obj); | |
| } | |
| // Enters with current token pre-checked as '{' | |
| parseObjectLiteral(): ParseResult<ObjectLiteralNode> { | |
| const obj: ObjectLiteralNode = { | |
| startToken: this.peek(), | |
| endToken: this.peek(), | |
| kind: 'LiteralObject', | |
| value: [], | |
| }; | |
| this.advance(); | |
| while (!this.check(TokenType.RightBrace) && !this.isAtEnd()) { | |
| const prop = this.parseProperty(); | |
| if (prop.value) { | |
| obj.value.push(prop.value); | |
| obj.endToken = prop.value.endToken; | |
| } | |
| if (prop.error) { | |
| this.synchronize(); | |
| } | |
| } | |
| const end = this.consume(TokenType.RightBrace, "Expected '}'."); | |
| if (end.error) { | |
| return { error: end.error, value: obj }; | |
| } | |
| if (end.value) { | |
| obj.endToken = end.value; | |
| } | |
| return ok(obj); | |
| } | |
| synthesizeToken(after: Token, value: string): Token { | |
| return { | |
| type: 'Unknown', | |
| column: after.column + after.text.length + 1, | |
| line: after.line, | |
| startOffset: after.endOffset + 1, | |
| endOffset: after.endOffset + 1, | |
| idx: -1, | |
| text: value, | |
| lexeme: value, | |
| }; | |
| } | |
| parseGainsLoses(): ParseResult<ModifyClassGainsLoses> { | |
| let val = this.parseFragment<ModifyClassGainsLoses>(this.gainLossRule); | |
| if (val.value?.startToken) { | |
| val.value.kind = val.value.startToken.lexeme === 'gains' ? 'ModifyGains' : 'ModifyLoses'; | |
| } | |
| return val; | |
| } | |
| parseReference(): ParseResult<ReferenceNode> { | |
| const token = this.peek(); | |
| this.advance(); | |
| if (token.type !== TokenType.Identifier) { | |
| return this.bad<ReferenceNode>(token, 'expected a reference to a blueprint'); | |
| } | |
| return ok({ | |
| startToken: token, | |
| endToken: token, | |
| kind: 'Reference', | |
| value: token.lexeme, | |
| }); | |
| } | |
| parseProperty(): ParseResult<PropertyNode> { | |
| const key = this.consume(TokenType.Identifier, "Expected a property name."); | |
| if (key.error) { | |
| return this.castBad<Token, PropertyNode>(key); | |
| } | |
| const colon = this.consume(TokenType.Colon, `Expected ':' after property name '${key.value.lexeme}'.`); | |
| if (colon.error) { | |
| return this.castBad<Token, PropertyNode>(key); | |
| } | |
| const value = this.parseValue(); | |
| const prop: PropertyNode = { | |
| kind: AstKind.Property, | |
| key: key.value, | |
| colon: colon.value, | |
| value, | |
| startToken: key.value, | |
| endToken: value ? value.endToken : colon.value | |
| }; | |
| return ok(prop); | |
| } | |
| parseValue(): ValueNode | null { | |
| if (this.match(TokenType.Number, TokenType.String, TokenType.Identifier)) { | |
| return this.buildLiteralNode(this.previous()); | |
| } else if (this.peek().type == TokenType.LeftBrace) { | |
| const obj = this.parseObjectLiteral(); | |
| if (obj.error) { | |
| return null; | |
| } else { | |
| return obj.value; | |
| } | |
| } else if (this.peek().type == TokenType.LeftBracket) { | |
| const obj = this.parseListLiteral(); | |
| if (obj.error) { | |
| return null; | |
| } else { | |
| return obj.value; | |
| } | |
| } | |
| return null; | |
| } | |
| buildLiteralNode(token: Token): PrimitiveLiteralNode { | |
| let value: string | number = token.lexeme; | |
| if (token.type === TokenType.Number) { | |
| value = parseFloat(token.lexeme); | |
| } | |
| return { | |
| kind: AstKind.Literal, | |
| value, | |
| token, | |
| startToken: token, | |
| endToken: token | |
| }; | |
| } | |
| consume(type: TokenType, message: string): ParseResult<Token> { | |
| if (this.check(type)) return ok(this.advance()); | |
| return this.bad<Token>(this.peek(), message); | |
| } | |
| consumeKeyword(lexeme: string, message: string): ParseResult<Token> { | |
| const token = this.peek(); | |
| if (token.type === TokenType.Keyword && token.lexeme === lexeme) { | |
| return ok(this.advance()); | |
| } | |
| return this.bad(token, message); | |
| } | |
| match(...types: TokenType[]): boolean { | |
| for (const type of types) { | |
| if (this.check(type)) { | |
| this.advance(); | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| check(type: TokenType): boolean { | |
| if (this.isAtEnd()) return false; | |
| return this.peek().type === type; | |
| } | |
| advance(): Token { | |
| if (!this.isAtEnd()) this.current++; | |
| return this.previous(); | |
| } | |
| isAtEnd(): boolean { | |
| return this.peek().type === TokenType.EOF; | |
| } | |
| peek(): Token { | |
| return this.tokens[this.current]; | |
| } | |
| previous(): Token { | |
| return this.tokens[this.current - 1]; | |
| } | |
| synchronize(): void { | |
| this.advance(); | |
| while (!this.isAtEnd()) { | |
| // A closing brace probably ends the current block? | |
| if (this.previous().type === TokenType.RightBrace) return; | |
| // Keywords probbaly new statement | |
| switch (this.peek().type) { | |
| case TokenType.Keyword: | |
| return; | |
| } | |
| // identifier = property? | |
| if (this.peek().type === TokenType.Identifier && this.peek().line > this.previous().line) { | |
| return; | |
| } | |
| this.advance(); | |
| } | |
| } | |
| parseFragment<T extends AstNode>(steps: RuleStep[]): ParseResult<T> { | |
| const result: Partial<T> = {}; | |
| result.startToken = this.peek(); | |
| result.endToken = this.peek(); | |
| for (const step of steps) { | |
| switch (step.type) { | |
| case 'choice': { | |
| const token = this.peek(); | |
| if (!step.idents.includes(token.lexeme)) { | |
| return this.bad(token, `Expected one of: ${step.idents.join(', ')}`); | |
| } | |
| (result as any)[step.captureAs] = token.lexeme; | |
| result.endToken = this.peek(); | |
| this.advance(); | |
| break; | |
| } | |
| case 'keyword': { | |
| const tokenResult = this.consume(TokenType.Identifier, `Expected keyword "${step.lexeme}"`); | |
| if (tokenResult.error || tokenResult.value.lexeme !== step.lexeme) { | |
| return this.bad(this.previous(), `Expected keyword "${step.lexeme}"`); | |
| } | |
| result.endToken = this.previous(); | |
| break; | |
| } | |
| case 'sub-rule': { | |
| const subResult = step.execRule(this); | |
| if (subResult.error) return { error: subResult.error, value: result as T }; // Propagate error | |
| (result as any)[step.captureAs] = subResult.value; | |
| result.endToken = this.previous(); | |
| break; | |
| } | |
| case 'value': { | |
| const itemResult = this.consume(step.valueType, `Expected ${TokenType[step.valueType]}`); | |
| if (itemResult.error) return { error: itemResult.error, value: result as T }; | |
| (result as any)[step.captureAs] = itemResult.value; | |
| result.endToken = this.previous(); | |
| break; | |
| } | |
| case 'list': { | |
| const items = []; | |
| while (true) { | |
| const itemResult = this.consume(step.itemType, `Expected ${TokenType[step.itemType]}`); | |
| if (itemResult.error) return { error: itemResult.error, value: result as T }; | |
| items.push(itemResult.value.lexeme); | |
| result.endToken = this.previous(); | |
| if (this.peek().type !== step.separator) break; | |
| this.advance(); // Consume the separator | |
| } | |
| (result as any)[step.captureAs] = items; | |
| break; | |
| } | |
| } | |
| } | |
| return ok(result as T); | |
| } | |
| gainLossRuleAll = compileGrammar('gains|loses $(feature:ref) at "all"'); | |
| gainLossRule = compileGrammar('gains|loses $(feature:ref) at [levels:int]'); | |
| featureRule = compileGrammar('requires class $(requiresClass:ref) level:int'); | |
| } | |
| const ruleMethodMap: { [key: string]: (parser: Parser) => ParseResult<any> } = { | |
| 'ref': parser => parser.parseReference(), | |
| }; | |
| const primitiveTypeMap: { [key: string]: TokenType } = { | |
| 'int': TokenType.Number, | |
| 'string': TokenType.String, | |
| }; | |
| type RuleStep = | |
| | { type: 'choice', idents: string[], captureAs: string } | |
| | { type: 'sub-rule', execRule: (parser: Parser) => ParseResult<any>, captureAs: string } | |
| | { type: 'keyword', lexeme: string } | |
| | { type: 'value', valueType: TokenType, captureAs: string } | |
| | { type: 'list', itemType: TokenType, separator: TokenType, captureAs: string }; | |
| function compileGrammar(grammar: string): RuleStep[] { | |
| const steps: RuleStep[] = []; | |
| const parts = grammar.split(' '); | |
| for (const part of parts) { | |
| // Sub-rule: $(captureName:ruleName) | |
| let match = part.match(/^\$\((\w+):(\w+)\)$/); | |
| if (match) { | |
| steps.push({ type: 'sub-rule', execRule: ruleMethodMap[match[2]], captureAs: match[1] }); | |
| continue; | |
| } | |
| // List: [captureName:itemType] | |
| match = part.match(/^\[(\w+):(\w+)\]$/); | |
| if (match) { | |
| steps.push({ type: 'list', itemType: primitiveTypeMap[match[2]], separator: TokenType.Comma, captureAs: match[1] }); | |
| continue; | |
| } | |
| // value: captureName:itemType | |
| match = part.match(/^(\w+):(\w+)$/); | |
| if (match) { | |
| steps.push({ type: 'value', valueType: primitiveTypeMap[match[2]], captureAs: match[1] }); | |
| continue; | |
| } | |
| // Choice: keyword1|keyword2 | |
| if (part.includes('|')) { | |
| steps.push({ type: 'choice', idents: part.split('|'), captureAs: 'kind' }); | |
| continue; | |
| } | |
| // Default to keyword | |
| steps.push({ type: 'keyword', lexeme: part }); | |
| } | |
| return steps; | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment