Skip to content

Instantly share code, notes, and snippets.

@byzantic
Created November 23, 2017 11:25
Show Gist options
  • Select an option

  • Save byzantic/e6f821b0b6683d9bb34ff0d5c761b4e1 to your computer and use it in GitHub Desktop.

Select an option

Save byzantic/e6f821b0b6683d9bb34ff0d5c761b4e1 to your computer and use it in GitHub Desktop.
Chemical Parser using ELm Applicative Parser
-- Cambridge Elm Meetup problem, chemical formulae,
-- implemented using Applicative Parser Combinators
--
module AppChemParser exposing (..)
import Char exposing (..)
import Dict exposing (..)
import Html exposing (Attribute, Html, beginnerProgram, div, input, text)
import Html.Attributes exposing (..)
import Html.Events exposing (onInput)
import AppParser exposing (..)
import String
-- DICTIONARY
-- for mapping Elemental symbols to Element names
data : Dict String String
data =
Dict.fromList
[ ( "Ac", "Actinium" )
, ( "Ag", "Silver" )
, ( "Al", "Aluminium" )
, ( "Am", "Americium" )
, ( "Ar", "Argon" )
, ( "As", "Arsenic" )
, ( "At", "Astatine" )
, ( "Au", "Gold" )
, ( "B", "Boron" )
, ( "Ba", "Barium" )
, ( "Be", "Beryllium" )
, ( "Bh", "Bohrium" )
, ( "Bi", "Bismuth" )
, ( "Bk", "Berkelium" )
, ( "Br", "Bromine" )
, ( "C", "Carbon" )
, ( "Ca", "Calcium" )
, ( "Cd", "Cadmium" )
, ( "Ce", "Cerium" )
, ( "Cf", "Californium" )
, ( "Cl", "Chlorine" )
, ( "Cm", "Curium" )
, ( "Cn", "Copernicium" )
, ( "Co", "Cobalt" )
, ( "Cr", "Chromium" )
, ( "Cs", "Cesium" )
, ( "Cu", "Copper" )
, ( "Db", "Dubnium" )
, ( "Ds", "Darmstadtium" )
, ( "Dy", "Dysprosium" )
, ( "Er", "Erbium" )
, ( "Es", "Einsteinium" )
, ( "Eu", "Europium" )
, ( "F", "Fluorine" )
, ( "Fe", "Iron" )
, ( "Fl", "Flerovium" )
, ( "Fm", "Fermium" )
, ( "Fr", "Francium" )
, ( "Ga", "Gallium" )
, ( "Gd", "Gadolinium" )
, ( "Ge", "Germanium" )
, ( "H", "Hydrogen" )
, ( "He", "Helium" )
, ( "Hf", "Hafnium" )
, ( "Hg", "Mercury" )
, ( "Ho", "Holmium" )
, ( "Hs", "Hassium" )
, ( "I", "Iodine" )
, ( "In", "Indium" )
, ( "Ir", "Iridium" )
, ( "K", "Potassium" )
, ( "Kr", "Krypton" )
, ( "La", "Lanthanum" )
, ( "Li", "Lithium" )
, ( "Lr", "Lawrencium" )
, ( "Lu", "Lutetium" )
, ( "Lv", "Livermorium" )
, ( "Md", "Mendelevium" )
, ( "Mg", "Magnesium" )
, ( "Mn", "Manganese" )
, ( "Mo", "Molybdenum" )
, ( "Mt", "Meitnerium" )
, ( "N", "Nitrogen" )
, ( "Na", "Sodium" )
, ( "Nb", "Niobium" )
, ( "Nd", "Neodymium" )
, ( "Ne", "Neon" )
, ( "Ni", "Nickel" )
, ( "No", "Nobelium" )
, ( "Np", "Neptunium" )
, ( "O", "Oxygen" )
, ( "Os", "Osmium" )
, ( "P", "Phosphorus" )
, ( "Pa", "Protactinium" )
, ( "Pb", "Lead" )
, ( "Pd", "Palladium" )
, ( "Pm", "Promethium" )
, ( "Po", "Polonium" )
, ( "Pr", "Praseodymium" )
, ( "Pt", "Platinum" )
, ( "Pu", "Plutonium" )
, ( "Ra", "Radium" )
, ( "Rb", "Rubidium" )
, ( "Re", "Rhenium" )
, ( "Rf", "Rutherfordium" )
, ( "Rg", "Roentgenium" )
, ( "Rh", "Rhodium" )
, ( "Rn", "Radon" )
, ( "Ru", "Ruthenium" )
, ( "S", "Sulphur" )
, ( "Sb", "Antimony" )
, ( "Sc", "Scandium" )
, ( "Se", "Selenium" )
, ( "Sg", "Seaborgium" )
, ( "Si", "Silicon" )
, ( "Sm", "Samarium" )
, ( "Sn", "Tin" )
, ( "Sr", "Strontium" )
, ( "Ta", "Tantalum" )
, ( "Tb", "Terbium" )
, ( "Tc", "Technetium" )
, ( "Te", "Tellurium" )
, ( "Th", "Thorium" )
, ( "Ti", "Titanium" )
, ( "Tl", "Thallium" )
, ( "Tm", "Thulium" )
, ( "U", "Uranium" )
, ( "Uuo", "Ununoctium" )
, ( "Uup", "Ununpentium" )
, ( "Uus", "Ununseptium" )
, ( "Uut", "Ununtrium" )
, ( "V", "Vanadium" )
, ( "W", "Tungsten" )
, ( "Xe", "Xenon" )
, ( "Y", "Yttrium" )
, ( "Yb", "Ytterbium" )
, ( "Zn", "Zinc" )
, ( "Zr", "Zirconium" )
]
-- TYPES
-- describing formula syntax
type alias Compound =
List QuantifiedGroup
type alias QuantifiedGroup =
( Group, Int )
type alias Element =
String
type Group
= El Element
| Comp Compound
lookupEl : String -> Maybe String
lookupEl s = get s data
-- PARSERS
-- <$> is the applicative version of map
-- so if lookupEl always succeeded, we could use <$>
-- however it may fail with Maybe, so <$?>
-- fails the whole parser if lookup produces Nothing
-- TODO - probably a better way to do this ..
--
-- AppParser.ident specifies parsers for 1st and subsequent chars
-- so here we use it to ensure the first letter is a capital
-- TODO - I suppose we could trim elements to ensure they are only 1 or 2 char ..
elname : Parser String
elname = lookupEl <$?> (ident pUpper pLower)
-- <$> lets us transform parser results, making it easy to build a type
element : Parser Group
element = El <$> elname
-- orElse is actually the 'alternative' operator (<|>), so we could write:
-- group = element <|> bracketedCompound
group : Parser Group
group = orElse
element
(\s -> bracketedCompound s) -- recursion issue
-- <$??> String.toInt produces a Result type
-- so <$??> is similar to <$?> and fails the whole parser
--
-- many1Char parses a sequence of 1 or more chars into a String
-- unfortunately, elm Strings are not List Char, so we need
-- separate combinators for Strings and Lists (see compound)
subscript : Parser Int
subscript = orElse (String.toInt <$??> (many1Char pDigit)) (pureParser 1)
-- this can be written instead in canonical applicative style:
-- quantifiedGroup = (,) <$> group <*> subscript
--
-- which can be read as applying some 'function' to a sequence of arguments
-- this makes it a very natural notation
quantifiedGroup : Parser QuantifiedGroup
quantifiedGroup = pureParser (,) <*> group <*> subscript
-- *> and <* are used to 'throw away' intermediate results
-- in the same way that elm-tools/parser uses |. and |=
-- Here, we use them to consume the brackets.
--
-- Again, we could write (ignoring recursion problems):
-- bracketedCompound = char '(' *> (Comp <$> compound ) <* char ')'
bracketedCompound : Parser Group
bracketedCompound = pureParser identity
<*> char '('
*> (Comp <$> (\s -> compound s)) -- recursion
<* char ')'
-- the combinators many and many1 can be used to parse sequences
-- of items, producing a list
compound : Parser Compound
-- compound = many1 quantifiedGroup
compound = (\s -> many1 quantifiedGroup s)
-- some test values
h2o : Compound
h2o =
[ ( El "Hydrogen", 2 )
, ( El "Oxygen", 1 )
]
al2so43 = "Al2(SO4)3"
--Glycidoxypropyltrimethoxysilane
gp3msilane = "C9H20O5Si"
-- hyaluronic acid
hyaluronic2 = "(C14H21NO11)2"
@byzantic
Copy link
Author

This is the example Chemical Formula Parser using Applicative Parsing, written in Elm
The problem is stated here https://gist.github.com/bitterjug/389a227aa243663de401ec5f16374d8c

This gist uses the AppParser.elm module - see https://gist.github.com/byzantic/bf2922861ff548193850def6c1b68e85

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment