Created
November 23, 2017 11:25
-
-
Save byzantic/e6f821b0b6683d9bb34ff0d5c761b4e1 to your computer and use it in GitHub Desktop.
Chemical Parser using ELm Applicative Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Cambridge Elm Meetup problem, chemical formulae, | |
| -- implemented using Applicative Parser Combinators | |
| -- | |
| module AppChemParser exposing (..) | |
| import Char exposing (..) | |
| import Dict exposing (..) | |
| import Html exposing (Attribute, Html, beginnerProgram, div, input, text) | |
| import Html.Attributes exposing (..) | |
| import Html.Events exposing (onInput) | |
| import AppParser exposing (..) | |
| import String | |
| -- DICTIONARY | |
| -- for mapping Elemental symbols to Element names | |
| data : Dict String String | |
| data = | |
| Dict.fromList | |
| [ ( "Ac", "Actinium" ) | |
| , ( "Ag", "Silver" ) | |
| , ( "Al", "Aluminium" ) | |
| , ( "Am", "Americium" ) | |
| , ( "Ar", "Argon" ) | |
| , ( "As", "Arsenic" ) | |
| , ( "At", "Astatine" ) | |
| , ( "Au", "Gold" ) | |
| , ( "B", "Boron" ) | |
| , ( "Ba", "Barium" ) | |
| , ( "Be", "Beryllium" ) | |
| , ( "Bh", "Bohrium" ) | |
| , ( "Bi", "Bismuth" ) | |
| , ( "Bk", "Berkelium" ) | |
| , ( "Br", "Bromine" ) | |
| , ( "C", "Carbon" ) | |
| , ( "Ca", "Calcium" ) | |
| , ( "Cd", "Cadmium" ) | |
| , ( "Ce", "Cerium" ) | |
| , ( "Cf", "Californium" ) | |
| , ( "Cl", "Chlorine" ) | |
| , ( "Cm", "Curium" ) | |
| , ( "Cn", "Copernicium" ) | |
| , ( "Co", "Cobalt" ) | |
| , ( "Cr", "Chromium" ) | |
| , ( "Cs", "Cesium" ) | |
| , ( "Cu", "Copper" ) | |
| , ( "Db", "Dubnium" ) | |
| , ( "Ds", "Darmstadtium" ) | |
| , ( "Dy", "Dysprosium" ) | |
| , ( "Er", "Erbium" ) | |
| , ( "Es", "Einsteinium" ) | |
| , ( "Eu", "Europium" ) | |
| , ( "F", "Fluorine" ) | |
| , ( "Fe", "Iron" ) | |
| , ( "Fl", "Flerovium" ) | |
| , ( "Fm", "Fermium" ) | |
| , ( "Fr", "Francium" ) | |
| , ( "Ga", "Gallium" ) | |
| , ( "Gd", "Gadolinium" ) | |
| , ( "Ge", "Germanium" ) | |
| , ( "H", "Hydrogen" ) | |
| , ( "He", "Helium" ) | |
| , ( "Hf", "Hafnium" ) | |
| , ( "Hg", "Mercury" ) | |
| , ( "Ho", "Holmium" ) | |
| , ( "Hs", "Hassium" ) | |
| , ( "I", "Iodine" ) | |
| , ( "In", "Indium" ) | |
| , ( "Ir", "Iridium" ) | |
| , ( "K", "Potassium" ) | |
| , ( "Kr", "Krypton" ) | |
| , ( "La", "Lanthanum" ) | |
| , ( "Li", "Lithium" ) | |
| , ( "Lr", "Lawrencium" ) | |
| , ( "Lu", "Lutetium" ) | |
| , ( "Lv", "Livermorium" ) | |
| , ( "Md", "Mendelevium" ) | |
| , ( "Mg", "Magnesium" ) | |
| , ( "Mn", "Manganese" ) | |
| , ( "Mo", "Molybdenum" ) | |
| , ( "Mt", "Meitnerium" ) | |
| , ( "N", "Nitrogen" ) | |
| , ( "Na", "Sodium" ) | |
| , ( "Nb", "Niobium" ) | |
| , ( "Nd", "Neodymium" ) | |
| , ( "Ne", "Neon" ) | |
| , ( "Ni", "Nickel" ) | |
| , ( "No", "Nobelium" ) | |
| , ( "Np", "Neptunium" ) | |
| , ( "O", "Oxygen" ) | |
| , ( "Os", "Osmium" ) | |
| , ( "P", "Phosphorus" ) | |
| , ( "Pa", "Protactinium" ) | |
| , ( "Pb", "Lead" ) | |
| , ( "Pd", "Palladium" ) | |
| , ( "Pm", "Promethium" ) | |
| , ( "Po", "Polonium" ) | |
| , ( "Pr", "Praseodymium" ) | |
| , ( "Pt", "Platinum" ) | |
| , ( "Pu", "Plutonium" ) | |
| , ( "Ra", "Radium" ) | |
| , ( "Rb", "Rubidium" ) | |
| , ( "Re", "Rhenium" ) | |
| , ( "Rf", "Rutherfordium" ) | |
| , ( "Rg", "Roentgenium" ) | |
| , ( "Rh", "Rhodium" ) | |
| , ( "Rn", "Radon" ) | |
| , ( "Ru", "Ruthenium" ) | |
| , ( "S", "Sulphur" ) | |
| , ( "Sb", "Antimony" ) | |
| , ( "Sc", "Scandium" ) | |
| , ( "Se", "Selenium" ) | |
| , ( "Sg", "Seaborgium" ) | |
| , ( "Si", "Silicon" ) | |
| , ( "Sm", "Samarium" ) | |
| , ( "Sn", "Tin" ) | |
| , ( "Sr", "Strontium" ) | |
| , ( "Ta", "Tantalum" ) | |
| , ( "Tb", "Terbium" ) | |
| , ( "Tc", "Technetium" ) | |
| , ( "Te", "Tellurium" ) | |
| , ( "Th", "Thorium" ) | |
| , ( "Ti", "Titanium" ) | |
| , ( "Tl", "Thallium" ) | |
| , ( "Tm", "Thulium" ) | |
| , ( "U", "Uranium" ) | |
| , ( "Uuo", "Ununoctium" ) | |
| , ( "Uup", "Ununpentium" ) | |
| , ( "Uus", "Ununseptium" ) | |
| , ( "Uut", "Ununtrium" ) | |
| , ( "V", "Vanadium" ) | |
| , ( "W", "Tungsten" ) | |
| , ( "Xe", "Xenon" ) | |
| , ( "Y", "Yttrium" ) | |
| , ( "Yb", "Ytterbium" ) | |
| , ( "Zn", "Zinc" ) | |
| , ( "Zr", "Zirconium" ) | |
| ] | |
| -- TYPES | |
| -- describing formula syntax | |
| type alias Compound = | |
| List QuantifiedGroup | |
| type alias QuantifiedGroup = | |
| ( Group, Int ) | |
| type alias Element = | |
| String | |
| type Group | |
| = El Element | |
| | Comp Compound | |
| lookupEl : String -> Maybe String | |
| lookupEl s = get s data | |
| -- PARSERS | |
| -- <$> is the applicative version of map | |
| -- so if lookupEl always succeeded, we could use <$> | |
| -- however it may fail with Maybe, so <$?> | |
| -- fails the whole parser if lookup produces Nothing | |
| -- TODO - probably a better way to do this .. | |
| -- | |
| -- AppParser.ident specifies parsers for 1st and subsequent chars | |
| -- so here we use it to ensure the first letter is a capital | |
| -- TODO - I suppose we could trim elements to ensure they are only 1 or 2 char .. | |
| elname : Parser String | |
| elname = lookupEl <$?> (ident pUpper pLower) | |
| -- <$> lets us transform parser results, making it easy to build a type | |
| element : Parser Group | |
| element = El <$> elname | |
| -- orElse is actually the 'alternative' operator (<|>), so we could write: | |
| -- group = element <|> bracketedCompound | |
| group : Parser Group | |
| group = orElse | |
| element | |
| (\s -> bracketedCompound s) -- recursion issue | |
| -- <$??> String.toInt produces a Result type | |
| -- so <$??> is similar to <$?> and fails the whole parser | |
| -- | |
| -- many1Char parses a sequence of 1 or more chars into a String | |
| -- unfortunately, elm Strings are not List Char, so we need | |
| -- separate combinators for Strings and Lists (see compound) | |
| subscript : Parser Int | |
| subscript = orElse (String.toInt <$??> (many1Char pDigit)) (pureParser 1) | |
| -- this can be written instead in canonical applicative style: | |
| -- quantifiedGroup = (,) <$> group <*> subscript | |
| -- | |
| -- which can be read as applying some 'function' to a sequence of arguments | |
| -- this makes it a very natural notation | |
| quantifiedGroup : Parser QuantifiedGroup | |
| quantifiedGroup = pureParser (,) <*> group <*> subscript | |
| -- *> and <* are used to 'throw away' intermediate results | |
| -- in the same way that elm-tools/parser uses |. and |= | |
| -- Here, we use them to consume the brackets. | |
| -- | |
| -- Again, we could write (ignoring recursion problems): | |
| -- bracketedCompound = char '(' *> (Comp <$> compound ) <* char ')' | |
| bracketedCompound : Parser Group | |
| bracketedCompound = pureParser identity | |
| <*> char '(' | |
| *> (Comp <$> (\s -> compound s)) -- recursion | |
| <* char ')' | |
| -- the combinators many and many1 can be used to parse sequences | |
| -- of items, producing a list | |
| compound : Parser Compound | |
| -- compound = many1 quantifiedGroup | |
| compound = (\s -> many1 quantifiedGroup s) | |
| -- some test values | |
| h2o : Compound | |
| h2o = | |
| [ ( El "Hydrogen", 2 ) | |
| , ( El "Oxygen", 1 ) | |
| ] | |
| al2so43 = "Al2(SO4)3" | |
| --Glycidoxypropyltrimethoxysilane | |
| gp3msilane = "C9H20O5Si" | |
| -- hyaluronic acid | |
| hyaluronic2 = "(C14H21NO11)2" | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the example Chemical Formula Parser using Applicative Parsing, written in Elm
The problem is stated here https://gist.github.com/bitterjug/389a227aa243663de401ec5f16374d8c
This gist uses the AppParser.elm module - see https://gist.github.com/byzantic/bf2922861ff548193850def6c1b68e85