Skip to content

Instantly share code, notes, and snippets.

@millerdev
Last active April 3, 2024 14:50
Show Gist options
  • Select an option

  • Save millerdev/03d881620dd0879d4347b0e971ff0be5 to your computer and use it in GitHub Desktop.

Select an option

Save millerdev/03d881620dd0879d4347b0e971ff0be5 to your computer and use it in GitHub Desktop.

Revisions

  1. millerdev revised this gist Apr 3, 2024. 1 changed file with 23 additions and 0 deletions.
    23 changes: 23 additions & 0 deletions prattparse.py
    Original file line number Diff line number Diff line change
    @@ -1,8 +1,27 @@
    # Pratt Parser in Python
    # https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
    #
    # Works with input like `a + b`.
    # The lexer does not support numbers (yet).
    #
    # https://gist.github.com/millerdev/03d881620dd0879d4347b0e971ff0be5
    from dataclasses import dataclass


    def main():
    while True:
    print("Type an expression:")
    try:
    expr = input("> ")
    if not expr:
    raise EOFError
    except EOFError:
    print()
    break
    print("AST:", parse(lex(expr)))
    print()


    def parse(tokens):
    parser = Parser(tokens)
    return parse_expression(parser, 0)
    @@ -186,3 +205,7 @@ class Token:

    class ParseError(Exception):
    ...


    if __name__ == "__main__":
    main()
  2. millerdev revised this gist Apr 3, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion prattparse.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    # Python implementation of code from
    # Pratt Parser in Python
    # https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
    from dataclasses import dataclass

  3. millerdev created this gist Apr 3, 2024.
    188 changes: 188 additions & 0 deletions prattparse.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,188 @@
    # Python implementation of code from
    # https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
    from dataclasses import dataclass


    def parse(tokens):
    parser = Parser(tokens)
    return parse_expression(parser, 0)


    def parse_expression(parser, precedence):
    token = parser.consume()
    parse_prefix = PREFIX_PARSERS.get(token.type)
    if parse_prefix is None:
    raise ParseError(f"Could not parse: {token}")
    left = parse_prefix(parser, token)

    while precedence < get_precedence(parser):
    token = parser.consume()
    parse_infix = INFIX_PARSERS[token.type]
    left = parse_infix(parser, left, token)

    return left


    def get_precedence(parser):
    token = parser.look_ahead()
    parse = INFIX_PARSERS.get(token.type)
    return 0 if parse is None else parse.precedence


    def parse_name(parser, token):
    return NameExpression(token.value)


    def parse_prefix_expression(parser, token, *, precedence):
    operand = parse_expression(parser, precedence)
    return PrefixExpression(token.value, operand)


    def parse_binary_expression(parser, left, token, *, precedence):
    right = parse_expression(parser, precedence)
    return OperatorExpression(left, token.type, right)


    def parse_postfix_expression(parser, left, token, *, precedence):
    return PostfixExpression(left, token.type)


    def parse_conditional_expression(parser, left, token, *, precedence):
    then_arm = parse_expression(parser, precedence)
    token = parser.consume()
    assert token.type == ':', token
    else_arm = parse_expression(parser, precedence)
    return ConditionalExpression(left, then_arm, else_arm)


    class Expression:
    ...


    @dataclass
    class ConditionalExpression(Expression):
    condition: Expression
    then_arm: Expression
    else_arm: Expression


    @dataclass
    class NameExpression(Expression):
    name: str


    @dataclass
    class PrefixExpression(Expression):
    operator: str
    operand: str


    @dataclass
    class OperatorExpression(Expression):
    left: Expression
    operator: str
    right: Expression


    @dataclass
    class PostfixExpression(Expression):
    left: Expression
    operator: str


    class Parser:
    def __init__(self, tokens):
    self.tokens = tokens
    self.peeked = None

    def consume(self):
    if self.peeked is None:
    return next(self.tokens)
    token = self.peeked
    self.peeked = None
    return token

    def look_ahead(self):
    if self.peeked is None:
    self.peeked = next(self.tokens)
    return self.peeked


    def lex(text):
    i = 0
    while i < len(text):
    char = text[i]
    i += 1
    if char in PUNCTUATORS:
    yield Token(char, char)
    elif char.isidentifier():
    start = i - 1
    while i < len(text):
    if not text[i].isalnum():
    break
    i += 1
    name = text[start:i]
    assert name.isidentifier(), name
    yield Token(NAME, name)

    yield Token(EOF, "")


    PRECEDENCES = {
    '=': 1,
    '?': 2,
    '+': 3,
    '-': 3,
    '*': 4,
    '/': 4,
    '^': 5,
    'pre': 6,
    '!': 7,
    'call': 8,
    }


    def add_precedence(parse_expr, op):
    def parse(*args):
    return parse_expr(*args, precedence=value)
    parse.precedence = value = PRECEDENCES[op]
    return parse


    NAME = 'NAME'
    EOF = 'EOF'
    PUNCTUATORS = '(),=+-*/^~!?:'

    PREFIX_OPS = list('+-~!')
    BINARY_OPS = list('^*/+-=')


    def make_prefix_parsers():
    parsers = {}
    parsers[NAME] = parse_name
    for op in PREFIX_OPS:
    parsers[op] = add_precedence(parse_prefix_expression, 'pre')
    return parsers


    def make_infix_parsers():
    parsers = {}
    for op in BINARY_OPS:
    parsers[op] = add_precedence(parse_binary_expression, op)
    parsers['!'] = add_precedence(parse_postfix_expression, '!')
    parsers['?'] = add_precedence(parse_conditional_expression, '?')
    return parsers


    PREFIX_PARSERS = make_prefix_parsers()
    INFIX_PARSERS = make_infix_parsers()


    @dataclass
    class Token:
    type: str
    value: str


    class ParseError(Exception):
    ...