Skip to content

Instantly share code, notes, and snippets.

@azur1s
Created April 7, 2024 13:11
Show Gist options
  • Select an option

  • Save azur1s/c64e4414d75fdbc88170cdc59eda11d3 to your computer and use it in GitHub Desktop.

Select an option

Save azur1s/c64e4414d75fdbc88170cdc59eda11d3 to your computer and use it in GitHub Desktop.
use chumsky::prelude::*;
use crate::expr::*;
pub type Span = SimpleSpan<usize>;
pub type Spanned<T> = (T, Span);
#[derive(Debug, Clone, PartialEq)]
pub enum Token<'src> {
Num(f64),
// Str(&'src str),
Sym(&'src str),
Open(char), Close(char),
Add, Sub, Mul, Div,
Eq, Ne, Lt, Le, Gt, Ge,
Arrow, Assign, Colon, Semi, Comma,
Let, In, Val, Type, Func,
}
impl<'src> std::fmt::Display for Token<'src> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Token::Num(n) => write!(f, "{}", n),
// Token::Str(s) => write!(f, "{}", s),
Token::Sym(s) => write!(f, "{}", s),
Token::Open(c) => write!(f, "{}", c),
Token::Close(c) => write!(f, "{}", c),
Token::Add => write!(f, "+"),
Token::Sub => write!(f, "-"),
Token::Mul => write!(f, "*"),
Token::Div => write!(f, "/"),
Token::Eq => write!(f, "=="),
Token::Ne => write!(f, "!="),
Token::Lt => write!(f, "<"),
Token::Le => write!(f, "<="),
Token::Gt => write!(f, ">"),
Token::Ge => write!(f, ">="),
Token::Arrow => write!(f, "->"),
Token::Assign => write!(f, "="),
Token::Colon => write!(f, ":"),
Token::Semi => write!(f, ";"),
Token::Comma => write!(f, ","),
Token::Let => write!(f, "let"),
Token::In => write!(f, "in"),
Token::Val => write!(f, "val"),
Token::Type => write!(f, "type"),
Token::Func => write!(f, "fn"),
}
}
}
pub fn lexer<'src>()
-> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
let num = text::int(10)
.then(just('.').then(text::digits(10)).or_not())
.to_slice()
.from_str()
.unwrapped()
.map(Token::Num);
let id = text::ascii::ident().map(|id: &str| match id {
"let" => Token::Let,
"in" => Token::In,
"val" => Token::Val,
"type" => Token::Type,
"fn" => Token::Func,
_ => Token::Sym(id),
});
let op = choice((
just("->").to(Token::Arrow),
just("==").to(Token::Eq),
just("!=").to(Token::Ne),
just("<=").to(Token::Le),
just(">=").to(Token::Ge),
just('(').to(Token::Open('(')),
just(')').to(Token::Close(')')),
just('[').to(Token::Open('[')),
just(']').to(Token::Close(']')),
just('{').to(Token::Open('{')),
just('}').to(Token::Close('}')),
just('+').to(Token::Add),
just('-').to(Token::Sub),
just('*').to(Token::Mul),
just('/').to(Token::Div),
just('<').to(Token::Lt),
just('>').to(Token::Gt),
just('=').to(Token::Assign),
just(':').to(Token::Colon),
just(';').to(Token::Semi),
just(',').to(Token::Comma),
));
let token = num.or(id).or(op);
let comment = just("#")
.then(any().and_is(just('\n').not()).repeated())
.padded();
token
.map_with(|t, e| (t, e.span()))
.padded_by(comment.repeated())
.padded()
.recover_with(skip_then_retry_until(any().ignored(), end()))
.repeated()
.collect()
}
pub fn lex(src: &str) -> (Option<Vec<(Token, Span)>>, Vec<Rich<char, Span>>) {
lexer().parse(src).into_output_errors()
}
type ParserInput<'tokens, 'src> =
chumsky::input::SpannedInput<Token<'src>, Span, &'tokens [(Token<'src>, Span)]>;
fn type_parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
Spanned<Type<'src>>,
extra::Err<Rich<'tokens, Token<'src>, Span>>,
> + Clone {
recursive(|ty| {
let items = ty.clone()
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>();
let val = select! {
Token::Sym("Num") => Type::Num,
Token::Sym(s) => Type::Var(s),
};
let constructor = val
.foldl_with(
items.clone()
.delimited_by(just(Token::Lt), just(Token::Gt))
.repeated(),
|name, args, _| match name {
Type::Var(name) => Type::Constructor { name, args },
_ => Type::Var(""),
},
);
let func = items
.delimited_by(just(Token::Open('(')), just(Token::Close(')')))
.then_ignore(just(Token::Arrow))
.then(ty.clone())
.map(|(args, ret)| Type::Func {
args,
ret: Box::new(ret),
});
func
.or(constructor)
}).map_with(|ty, e| (ty, e.span()))
}
fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
Spanned<Expr<'src>>,
extra::Err<Rich<'tokens, Token<'src>, Span>>,
> + Clone {
recursive(|expr| {
let id = select! { Token::Sym(s) => s };
let args = id.clone().repeated().collect::<Vec<_>>();
let items = expr.clone()
.separated_by(just(Token::Comma))
.allow_trailing()
.collect::<Vec<_>>();
let value = select! {
Token::Num(n) => Expr::Num(n),
};
let list = items.clone()
.delimited_by(just(Token::Open('[')), just(Token::Close(']')))
.map(Expr::List);
let lam = just(Token::Func)
.ignore_then(id.clone().repeated().collect::<Vec<_>>())
.then_ignore(just(Token::Arrow))
.then(expr.clone())
.map(|(args, body)| Expr::Lam {
args,
body: Box::new(body),
});
// let let_ = just(Token::Let)
// .ignore_then(id.clone())
// .then(args.clone())
// .then_ignore(just(Token::Assign))
// .then(expr.clone())
// .then(
// just(Token::In)
// .ignore_then(expr.clone())
// .or_not())
// .map(|(((name, args), body), then)| Expr::Let {
// name, args,
// body: Box::new(body),
// then: then.map(Box::new),
// });
let val = just(Token::Val)
.ignore_then(id.clone())
.then_ignore(just(Token::Colon))
.then(type_parser())
.map(|(name, ty)| Expr::Val {
name,
ty: Box::new(ty),
});
let atom = value
.or(id.map(Expr::Var))
.or(list)
.or(lam)
// .or(let_)
.or(val)
.map_with(|expr, e| (expr, e.span()))
.recover_with(via_parser(nested_delimiters(
Token::Open('('),
Token::Close(')'),
[
(Token::Open('['), Token::Close(']')),
(Token::Open('{'), Token::Close('}')),
],
|span| (Expr::Error, span),
)))
.recover_with(via_parser(nested_delimiters(
Token::Open('['),
Token::Close(']'),
[
(Token::Open('('), Token::Close(')')),
(Token::Open('{'), Token::Close('}')),
],
|span| (Expr::Error, span),
)))
.boxed();
let call = atom.foldl_with(
items
.delimited_by(just(Token::Open('(')), just(Token::Close(')')))
.map_with(|args, e| (args, e.span()))
.repeated(),
|f, args, e| (Expr::Call {
func: Box::new(f),
args,
}, e.span())
);
let op = choice((
just(Token::Mul).to(BinOp::Mul),
just(Token::Div).to(BinOp::Div),
));
let product = call
.clone()
.foldl_with(op.then(call).repeated(), |a, (op, b), e|
(Expr::Bin {
op,
lhs: Box::new(a),
rhs: Box::new(b),
}, e.span())
);
let op = choice((
just(Token::Add).to(BinOp::Add),
just(Token::Sub).to(BinOp::Sub),
));
let sum = product
.clone()
.foldl_with(op.then(product).repeated(), |a, (op, b), e|
(Expr::Bin {
op,
lhs: Box::new(a),
rhs: Box::new(b),
}, e.span())
);
let op = choice((
just(Token::Eq).to(BinOp::Eq),
just(Token::Ne).to(BinOp::Ne),
just(Token::Lt).to(BinOp::Lt),
just(Token::Le).to(BinOp::Le),
just(Token::Gt).to(BinOp::Gt),
just(Token::Ge).to(BinOp::Ge),
));
let cmp = sum
.clone()
.foldl_with(op.then(sum).repeated(), |a, (op, b), e|
(Expr::Bin {
op,
lhs: Box::new(a),
rhs: Box::new(b),
}, e.span())
);
// id a b = expr
let let_ = id.clone()
.then(args.clone())
.then_ignore(just(Token::Assign))
.then(cmp.clone())
.map_with(|((name, args), body), e| (Expr::Let {
name, args,
body: Box::new(body),
then: None,
}, e.span()));
let_ // .or(cmp)
})
}
fn exprs_parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
Vec<(Expr<'src>, Span)>,
extra::Err<Rich<'tokens, Token<'src>, Span>>,
> + Clone {
expr_parser().clone()
.then_ignore(just(Token::Semi))
.repeated()
.collect::<Vec<_>>()
}
pub fn parse<'tokens, 'src: 'tokens>(tokens: &'tokens [(Token<'src>, Span)], len: usize)
-> (Option<Spanned<Vec<(Expr<'src>, Span)>>>, Vec<Rich<'tokens, Token<'src>, Span>>) {
let (ast, errs) = exprs_parser()
.map_with(|ast, e| (ast, e.span()))
.parse(tokens.spanned((len..len).into()))
.into_output_errors();
(ast, errs)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment