Last active
April 3, 2024 14:50
-
-
Save millerdev/03d881620dd0879d4347b0e971ff0be5 to your computer and use it in GitHub Desktop.
Revisions
-
millerdev revised this gist
Apr 3, 2024 . 1 changed file with 23 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,8 +1,27 @@ # Pratt Parser in Python # https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ # # Works with input like `a + b`. # The lexer does not support numbers (yet). # # https://gist.github.com/millerdev/03d881620dd0879d4347b0e971ff0be5 from dataclasses import dataclass def main(): while True: print("Type an expression:") try: expr = input("> ") if not expr: raise EOFError except EOFError: print() break print("AST:", parse(lex(expr))) print() def parse(tokens): parser = Parser(tokens) return parse_expression(parser, 0) @@ -186,3 +205,7 @@ class Token: class ParseError(Exception): ... if __name__ == "__main__": main() -
millerdev revised this gist
Apr 3, 2024 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,4 @@ # Pratt Parser in Python # https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ from dataclasses import dataclass -
millerdev created this gist
Apr 3, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,188 @@ # Python implementation of code from # https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ from dataclasses import dataclass def parse(tokens): parser = Parser(tokens) return parse_expression(parser, 0) def parse_expression(parser, precedence): token = parser.consume() parse_prefix = PREFIX_PARSERS.get(token.type) if parse_prefix is None: raise ParseError(f"Could not parse: {token}") left = parse_prefix(parser, token) while precedence < get_precedence(parser): token = parser.consume() parse_infix = INFIX_PARSERS[token.type] left = parse_infix(parser, left, token) return left def get_precedence(parser): token = parser.look_ahead() parse = INFIX_PARSERS.get(token.type) return 0 if parse is None else parse.precedence def parse_name(parser, token): return NameExpression(token.value) def parse_prefix_expression(parser, token, *, precedence): operand = parse_expression(parser, precedence) return PrefixExpression(token.value, operand) def parse_binary_expression(parser, left, token, *, precedence): right = parse_expression(parser, precedence) return OperatorExpression(left, token.type, right) def parse_postfix_expression(parser, left, token, *, precedence): return PostfixExpression(left, token.type) def parse_conditional_expression(parser, left, token, *, precedence): then_arm = parse_expression(parser, precedence) token = parser.consume() assert token.type == ':', token else_arm = parse_expression(parser, precedence) return ConditionalExpression(left, then_arm, else_arm) class Expression: ... @dataclass class ConditionalExpression(Expression): condition: Expression then_arm: Expression else_arm: Expression @dataclass class NameExpression(Expression): name: str @dataclass class PrefixExpression(Expression): operator: str operand: str @dataclass class OperatorExpression(Expression): left: Expression operator: str right: Expression @dataclass class PostfixExpression(Expression): left: Expression operator: str class Parser: def __init__(self, tokens): self.tokens = tokens self.peeked = None def consume(self): if self.peeked is None: return next(self.tokens) token = self.peeked self.peeked = None return token def look_ahead(self): if self.peeked is None: self.peeked = next(self.tokens) return self.peeked def lex(text): i = 0 while i < len(text): char = text[i] i += 1 if char in PUNCTUATORS: yield Token(char, char) elif char.isidentifier(): start = i - 1 while i < len(text): if not text[i].isalnum(): break i += 1 name = text[start:i] assert name.isidentifier(), name yield Token(NAME, name) yield Token(EOF, "") PRECEDENCES = { '=': 1, '?': 2, '+': 3, '-': 3, '*': 4, '/': 4, '^': 5, 'pre': 6, '!': 7, 'call': 8, } def add_precedence(parse_expr, op): def parse(*args): return parse_expr(*args, precedence=value) parse.precedence = value = PRECEDENCES[op] return parse NAME = 'NAME' EOF = 'EOF' PUNCTUATORS = '(),=+-*/^~!?:' PREFIX_OPS = list('+-~!') BINARY_OPS = list('^*/+-=') def make_prefix_parsers(): parsers = {} parsers[NAME] = parse_name for op in PREFIX_OPS: parsers[op] = add_precedence(parse_prefix_expression, 'pre') return parsers def make_infix_parsers(): parsers = {} for op in BINARY_OPS: parsers[op] = add_precedence(parse_binary_expression, op) parsers['!'] = add_precedence(parse_postfix_expression, '!') parsers['?'] = add_precedence(parse_conditional_expression, '?') return parsers PREFIX_PARSERS = make_prefix_parsers() INFIX_PARSERS = make_infix_parsers() @dataclass class Token: type: str value: str class ParseError(Exception): ...