Created
April 7, 2024 13:11
-
-
Save azur1s/c64e4414d75fdbc88170cdc59eda11d3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use chumsky::prelude::*; | |
| use crate::expr::*; | |
| pub type Span = SimpleSpan<usize>; | |
| pub type Spanned<T> = (T, Span); | |
| #[derive(Debug, Clone, PartialEq)] | |
| pub enum Token<'src> { | |
| Num(f64), | |
| // Str(&'src str), | |
| Sym(&'src str), | |
| Open(char), Close(char), | |
| Add, Sub, Mul, Div, | |
| Eq, Ne, Lt, Le, Gt, Ge, | |
| Arrow, Assign, Colon, Semi, Comma, | |
| Let, In, Val, Type, Func, | |
| } | |
| impl<'src> std::fmt::Display for Token<'src> { | |
| fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | |
| match self { | |
| Token::Num(n) => write!(f, "{}", n), | |
| // Token::Str(s) => write!(f, "{}", s), | |
| Token::Sym(s) => write!(f, "{}", s), | |
| Token::Open(c) => write!(f, "{}", c), | |
| Token::Close(c) => write!(f, "{}", c), | |
| Token::Add => write!(f, "+"), | |
| Token::Sub => write!(f, "-"), | |
| Token::Mul => write!(f, "*"), | |
| Token::Div => write!(f, "/"), | |
| Token::Eq => write!(f, "=="), | |
| Token::Ne => write!(f, "!="), | |
| Token::Lt => write!(f, "<"), | |
| Token::Le => write!(f, "<="), | |
| Token::Gt => write!(f, ">"), | |
| Token::Ge => write!(f, ">="), | |
| Token::Arrow => write!(f, "->"), | |
| Token::Assign => write!(f, "="), | |
| Token::Colon => write!(f, ":"), | |
| Token::Semi => write!(f, ";"), | |
| Token::Comma => write!(f, ","), | |
| Token::Let => write!(f, "let"), | |
| Token::In => write!(f, "in"), | |
| Token::Val => write!(f, "val"), | |
| Token::Type => write!(f, "type"), | |
| Token::Func => write!(f, "fn"), | |
| } | |
| } | |
| } | |
| pub fn lexer<'src>() | |
| -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> { | |
| let num = text::int(10) | |
| .then(just('.').then(text::digits(10)).or_not()) | |
| .to_slice() | |
| .from_str() | |
| .unwrapped() | |
| .map(Token::Num); | |
| let id = text::ascii::ident().map(|id: &str| match id { | |
| "let" => Token::Let, | |
| "in" => Token::In, | |
| "val" => Token::Val, | |
| "type" => Token::Type, | |
| "fn" => Token::Func, | |
| _ => Token::Sym(id), | |
| }); | |
| let op = choice(( | |
| just("->").to(Token::Arrow), | |
| just("==").to(Token::Eq), | |
| just("!=").to(Token::Ne), | |
| just("<=").to(Token::Le), | |
| just(">=").to(Token::Ge), | |
| just('(').to(Token::Open('(')), | |
| just(')').to(Token::Close(')')), | |
| just('[').to(Token::Open('[')), | |
| just(']').to(Token::Close(']')), | |
| just('{').to(Token::Open('{')), | |
| just('}').to(Token::Close('}')), | |
| just('+').to(Token::Add), | |
| just('-').to(Token::Sub), | |
| just('*').to(Token::Mul), | |
| just('/').to(Token::Div), | |
| just('<').to(Token::Lt), | |
| just('>').to(Token::Gt), | |
| just('=').to(Token::Assign), | |
| just(':').to(Token::Colon), | |
| just(';').to(Token::Semi), | |
| just(',').to(Token::Comma), | |
| )); | |
| let token = num.or(id).or(op); | |
| let comment = just("#") | |
| .then(any().and_is(just('\n').not()).repeated()) | |
| .padded(); | |
| token | |
| .map_with(|t, e| (t, e.span())) | |
| .padded_by(comment.repeated()) | |
| .padded() | |
| .recover_with(skip_then_retry_until(any().ignored(), end())) | |
| .repeated() | |
| .collect() | |
| } | |
| pub fn lex(src: &str) -> (Option<Vec<(Token, Span)>>, Vec<Rich<char, Span>>) { | |
| lexer().parse(src).into_output_errors() | |
| } | |
| type ParserInput<'tokens, 'src> = | |
| chumsky::input::SpannedInput<Token<'src>, Span, &'tokens [(Token<'src>, Span)]>; | |
| fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser< | |
| 'tokens, | |
| ParserInput<'tokens, 'src>, | |
| Spanned<Type<'src>>, | |
| extra::Err<Rich<'tokens, Token<'src>, Span>>, | |
| > + Clone { | |
| recursive(|ty| { | |
| let items = ty.clone() | |
| .separated_by(just(Token::Comma)) | |
| .allow_trailing() | |
| .collect::<Vec<_>>(); | |
| let val = select! { | |
| Token::Sym("Num") => Type::Num, | |
| Token::Sym(s) => Type::Var(s), | |
| }; | |
| let constructor = val | |
| .foldl_with( | |
| items.clone() | |
| .delimited_by(just(Token::Lt), just(Token::Gt)) | |
| .repeated(), | |
| |name, args, _| match name { | |
| Type::Var(name) => Type::Constructor { name, args }, | |
| _ => Type::Var(""), | |
| }, | |
| ); | |
| let func = items | |
| .delimited_by(just(Token::Open('(')), just(Token::Close(')'))) | |
| .then_ignore(just(Token::Arrow)) | |
| .then(ty.clone()) | |
| .map(|(args, ret)| Type::Func { | |
| args, | |
| ret: Box::new(ret), | |
| }); | |
| func | |
| .or(constructor) | |
| }).map_with(|ty, e| (ty, e.span())) | |
| } | |
| fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< | |
| 'tokens, | |
| ParserInput<'tokens, 'src>, | |
| Spanned<Expr<'src>>, | |
| extra::Err<Rich<'tokens, Token<'src>, Span>>, | |
| > + Clone { | |
| recursive(|expr| { | |
| let id = select! { Token::Sym(s) => s }; | |
| let args = id.clone().repeated().collect::<Vec<_>>(); | |
| let items = expr.clone() | |
| .separated_by(just(Token::Comma)) | |
| .allow_trailing() | |
| .collect::<Vec<_>>(); | |
| let value = select! { | |
| Token::Num(n) => Expr::Num(n), | |
| }; | |
| let list = items.clone() | |
| .delimited_by(just(Token::Open('[')), just(Token::Close(']'))) | |
| .map(Expr::List); | |
| let lam = just(Token::Func) | |
| .ignore_then(id.clone().repeated().collect::<Vec<_>>()) | |
| .then_ignore(just(Token::Arrow)) | |
| .then(expr.clone()) | |
| .map(|(args, body)| Expr::Lam { | |
| args, | |
| body: Box::new(body), | |
| }); | |
| // let let_ = just(Token::Let) | |
| // .ignore_then(id.clone()) | |
| // .then(args.clone()) | |
| // .then_ignore(just(Token::Assign)) | |
| // .then(expr.clone()) | |
| // .then( | |
| // just(Token::In) | |
| // .ignore_then(expr.clone()) | |
| // .or_not()) | |
| // .map(|(((name, args), body), then)| Expr::Let { | |
| // name, args, | |
| // body: Box::new(body), | |
| // then: then.map(Box::new), | |
| // }); | |
| let val = just(Token::Val) | |
| .ignore_then(id.clone()) | |
| .then_ignore(just(Token::Colon)) | |
| .then(type_parser()) | |
| .map(|(name, ty)| Expr::Val { | |
| name, | |
| ty: Box::new(ty), | |
| }); | |
| let atom = value | |
| .or(id.map(Expr::Var)) | |
| .or(list) | |
| .or(lam) | |
| // .or(let_) | |
| .or(val) | |
| .map_with(|expr, e| (expr, e.span())) | |
| .recover_with(via_parser(nested_delimiters( | |
| Token::Open('('), | |
| Token::Close(')'), | |
| [ | |
| (Token::Open('['), Token::Close(']')), | |
| (Token::Open('{'), Token::Close('}')), | |
| ], | |
| |span| (Expr::Error, span), | |
| ))) | |
| .recover_with(via_parser(nested_delimiters( | |
| Token::Open('['), | |
| Token::Close(']'), | |
| [ | |
| (Token::Open('('), Token::Close(')')), | |
| (Token::Open('{'), Token::Close('}')), | |
| ], | |
| |span| (Expr::Error, span), | |
| ))) | |
| .boxed(); | |
| let call = atom.foldl_with( | |
| items | |
| .delimited_by(just(Token::Open('(')), just(Token::Close(')'))) | |
| .map_with(|args, e| (args, e.span())) | |
| .repeated(), | |
| |f, args, e| (Expr::Call { | |
| func: Box::new(f), | |
| args, | |
| }, e.span()) | |
| ); | |
| let op = choice(( | |
| just(Token::Mul).to(BinOp::Mul), | |
| just(Token::Div).to(BinOp::Div), | |
| )); | |
| let product = call | |
| .clone() | |
| .foldl_with(op.then(call).repeated(), |a, (op, b), e| | |
| (Expr::Bin { | |
| op, | |
| lhs: Box::new(a), | |
| rhs: Box::new(b), | |
| }, e.span()) | |
| ); | |
| let op = choice(( | |
| just(Token::Add).to(BinOp::Add), | |
| just(Token::Sub).to(BinOp::Sub), | |
| )); | |
| let sum = product | |
| .clone() | |
| .foldl_with(op.then(product).repeated(), |a, (op, b), e| | |
| (Expr::Bin { | |
| op, | |
| lhs: Box::new(a), | |
| rhs: Box::new(b), | |
| }, e.span()) | |
| ); | |
| let op = choice(( | |
| just(Token::Eq).to(BinOp::Eq), | |
| just(Token::Ne).to(BinOp::Ne), | |
| just(Token::Lt).to(BinOp::Lt), | |
| just(Token::Le).to(BinOp::Le), | |
| just(Token::Gt).to(BinOp::Gt), | |
| just(Token::Ge).to(BinOp::Ge), | |
| )); | |
| let cmp = sum | |
| .clone() | |
| .foldl_with(op.then(sum).repeated(), |a, (op, b), e| | |
| (Expr::Bin { | |
| op, | |
| lhs: Box::new(a), | |
| rhs: Box::new(b), | |
| }, e.span()) | |
| ); | |
| // id a b = expr | |
| let let_ = id.clone() | |
| .then(args.clone()) | |
| .then_ignore(just(Token::Assign)) | |
| .then(cmp.clone()) | |
| .map_with(|((name, args), body), e| (Expr::Let { | |
| name, args, | |
| body: Box::new(body), | |
| then: None, | |
| }, e.span())); | |
| let_ // .or(cmp) | |
| }) | |
| } | |
| fn exprs_parser<'tokens, 'src: 'tokens>() -> impl Parser< | |
| 'tokens, | |
| ParserInput<'tokens, 'src>, | |
| Vec<(Expr<'src>, Span)>, | |
| extra::Err<Rich<'tokens, Token<'src>, Span>>, | |
| > + Clone { | |
| expr_parser().clone() | |
| .then_ignore(just(Token::Semi)) | |
| .repeated() | |
| .collect::<Vec<_>>() | |
| } | |
| pub fn parse<'tokens, 'src: 'tokens>(tokens: &'tokens [(Token<'src>, Span)], len: usize) | |
| -> (Option<Spanned<Vec<(Expr<'src>, Span)>>>, Vec<Rich<'tokens, Token<'src>, Span>>) { | |
| let (ast, errs) = exprs_parser() | |
| .map_with(|ast, e| (ast, e.span())) | |
| .parse(tokens.spanned((len..len).into())) | |
| .into_output_errors(); | |
| (ast, errs) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment