Last active
June 22, 2021 22:42
-
-
Save eignnx/3c8444b8e2f4d8ce10fcd97815f29d2e to your computer and use it in GitHub Desktop.
An idea for how to handle whitespace in Rust's `nom` parser-combinator library.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// A module for parsing whitespace. Takes into account comments too. | |
| /// | |
| /// # Module Outline | |
| /// - mod space | |
| /// - fn comment | |
| /// - mod allowed | |
| /// - fn here | |
| /// - fn after | |
| /// - fn before | |
| /// - fn around | |
| /// - mod required | |
| /// - fn here | |
| /// - fn after | |
| /// - fn before | |
| /// - fn around | |
| /// | |
| /// The module structure allows semantic whitespace handling while constructing parsers. | |
| /// | |
| /// # Example | |
| /// | |
| /// ```rust | |
| /// let lisp_sexp = delimited( | |
| /// char('('), | |
| /// space::allowed::around( | |
| /// separated_nonempty_list(space::required::here, some_parser), | |
| /// ), | |
| /// char(')'), | |
| /// ); | |
| /// | |
| /// let rust_fn_definition = preceded( | |
| /// space::required::after(tag("fn")), | |
| /// tuple(( | |
| /// space::allowed::after(ident), | |
| /// delimited(char('('), space::allowed::around(param_list), char(')')), | |
| /// preceded( | |
| /// space::allowed::around(tag("->")), | |
| /// delimited(char('{'), space::allowed::around(block_interior), char('}')), | |
| /// ), | |
| /// )), | |
| /// ); | |
| /// | |
| /// let source_file = terminated( | |
| /// space::allowed::around(separated_list( | |
| /// space::allowed::here, | |
| /// alt((impl_block, fn_definition, trait_definition, type_definition)), | |
| /// )), | |
| /// eof, | |
| /// ); | |
| /// ``` | |
| mod space { | |
| use nom::{ | |
| branch::alt, | |
| bytes::complete::{tag, take_till}, | |
| character::complete::{multispace0, multispace1}, | |
| combinator::recognize, | |
| error::ParseError, | |
| multi::many1, | |
| sequence::{delimited, preceded, terminated}, | |
| IResult, | |
| }; | |
| /// A comment starts with `//` and continues till the end of the line, or | |
| /// end of input, whichever comes first. Note: this parser explicitly does | |
| /// NOT consume the '\n' character at the end of lines. | |
| pub fn comment<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
| where | |
| E: ParseError<&'i str> | |
| { | |
| let (i, _) = tag("//")(i)?; | |
| let (i, content) = take_till(|ch| ch == '\n')(i)?; | |
| // Strip off the first space if it has one. | |
| if content.starts_with(' ') { | |
| Ok((i, &content[1..])) | |
| } else { | |
| Ok((i, content)) | |
| } | |
| } | |
| pub mod allowed { | |
| use super::*; | |
| /// Whitespace is allowed here, but not required. | |
| pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
| where | |
| E: ParseError<&'i str> | |
| { | |
| alt((super::required::here, multispace0))(i) | |
| } | |
| /// Has potentially-empty whitespace before **and** after the captured parser. | |
| pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| delimited(here, &parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace after the captured parser. | |
| pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| terminated(&parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace before the captured parser. | |
| pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| preceded(here, &parser)(i) | |
| } | |
| } | |
| pub mod required { | |
| use super::*; | |
| /// Whitespace is required here. | |
| pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
| where | |
| E: ParseError<&'i str> | |
| { | |
| recognize(many1(alt((multispace1, comment))))(i) | |
| } | |
| /// Has potentially-empty whitespace before **and** after the captured parser. | |
| pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| delimited(here, &parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace after the captured parser. | |
| pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| terminated(&parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace before the captured parser. | |
| pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| preceded(here, &parser)(i) | |
| } | |
| } | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Shuumatsu Wow didn't know anybody was using this code haha! Um, I'm not sure, but you could try changing all of the
Fn(_) -> _types in this gist toFnMut(_) -> _types. That might solve the problem. I think innomversion 6 they basically did the same change in their codebase (allowingFnMuttypes as parsers), so that's what makes me think it might work here.When I get a chance I'll try this change out in my codebase and, if it works, I'll update the gist. ✌️