byzantic · November 23, 2017 11:25 · byzantic · Nov 23, 2017
diff --git a/AppChemParser.elm b/AppChemParser.elm
 -- Cambridge Elm Meetup problem, chemical formulae,
 -- implemented using Applicative Parser Combinators
 --

 module AppChemParser exposing (..)

 import Char exposing (..)
 import Dict exposing (..)
 import Html exposing (Attribute, Html, beginnerProgram, div, input, text)
 import Html.Attributes exposing (..)
 import Html.Events exposing (onInput)
 import AppParser exposing (..)
 import String

 -- DICTIONARY
 --   for mapping Elemental symbols to Element names

 data : Dict String String
 data =
    Dict.fromList
        [ ( "Ac", "Actinium" )
        , ( "Ag", "Silver" )
        , ( "Al", "Aluminium" )
        , ( "Am", "Americium" )
        , ( "Ar", "Argon" )
        , ( "As", "Arsenic" )
        , ( "At", "Astatine" )
        , ( "Au", "Gold" )
        , ( "B", "Boron" )
        , ( "Ba", "Barium" )
        , ( "Be", "Beryllium" )
        , ( "Bh", "Bohrium" )
        , ( "Bi", "Bismuth" )
        , ( "Bk", "Berkelium" )
        , ( "Br", "Bromine" )
        , ( "C", "Carbon" )
        , ( "Ca", "Calcium" )
        , ( "Cd", "Cadmium" )
        , ( "Ce", "Cerium" )
        , ( "Cf", "Californium" )
        , ( "Cl", "Chlorine" )
        , ( "Cm", "Curium" )
        , ( "Cn", "Copernicium" )
        , ( "Co", "Cobalt" )
        , ( "Cr", "Chromium" )
        , ( "Cs", "Cesium" )
        , ( "Cu", "Copper" )
        , ( "Db", "Dubnium" )
        , ( "Ds", "Darmstadtium" )
        , ( "Dy", "Dysprosium" )
        , ( "Er", "Erbium" )
        , ( "Es", "Einsteinium" )
        , ( "Eu", "Europium" )
        , ( "F", "Fluorine" )
        , ( "Fe", "Iron" )
        , ( "Fl", "Flerovium" )
        , ( "Fm", "Fermium" )
        , ( "Fr", "Francium" )
        , ( "Ga", "Gallium" )
        , ( "Gd", "Gadolinium" )
        , ( "Ge", "Germanium" )
        , ( "H", "Hydrogen" )
        , ( "He", "Helium" )
        , ( "Hf", "Hafnium" )
        , ( "Hg", "Mercury" )
        , ( "Ho", "Holmium" )
        , ( "Hs", "Hassium" )
        , ( "I", "Iodine" )
        , ( "In", "Indium" )
        , ( "Ir", "Iridium" )
        , ( "K", "Potassium" )
        , ( "Kr", "Krypton" )
        , ( "La", "Lanthanum" )
        , ( "Li", "Lithium" )
        , ( "Lr", "Lawrencium" )
        , ( "Lu", "Lutetium" )
        , ( "Lv", "Livermorium" )
        , ( "Md", "Mendelevium" )
        , ( "Mg", "Magnesium" )
        , ( "Mn", "Manganese" )
        , ( "Mo", "Molybdenum" )
        , ( "Mt", "Meitnerium" )
        , ( "N", "Nitrogen" )
        , ( "Na", "Sodium" )
        , ( "Nb", "Niobium" )
        , ( "Nd", "Neodymium" )
        , ( "Ne", "Neon" )
        , ( "Ni", "Nickel" )
        , ( "No", "Nobelium" )
        , ( "Np", "Neptunium" )
        , ( "O", "Oxygen" )
        , ( "Os", "Osmium" )
        , ( "P", "Phosphorus" )
        , ( "Pa", "Protactinium" )
        , ( "Pb", "Lead" )
        , ( "Pd", "Palladium" )
        , ( "Pm", "Promethium" )
        , ( "Po", "Polonium" )
        , ( "Pr", "Praseodymium" )
        , ( "Pt", "Platinum" )
        , ( "Pu", "Plutonium" )
        , ( "Ra", "Radium" )
        , ( "Rb", "Rubidium" )
        , ( "Re", "Rhenium" )
        , ( "Rf", "Rutherfordium" )
        , ( "Rg", "Roentgenium" )
        , ( "Rh", "Rhodium" )
        , ( "Rn", "Radon" )
        , ( "Ru", "Ruthenium" )
        , ( "S", "Sulphur" )
        , ( "Sb", "Antimony" )
        , ( "Sc", "Scandium" )
        , ( "Se", "Selenium" )
        , ( "Sg", "Seaborgium" )
        , ( "Si", "Silicon" )
        , ( "Sm", "Samarium" )
        , ( "Sn", "Tin" )
        , ( "Sr", "Strontium" )
        , ( "Ta", "Tantalum" )
        , ( "Tb", "Terbium" )
        , ( "Tc", "Technetium" )
        , ( "Te", "Tellurium" )
        , ( "Th", "Thorium" )
        , ( "Ti", "Titanium" )
        , ( "Tl", "Thallium" )
        , ( "Tm", "Thulium" )
        , ( "U", "Uranium" )
        , ( "Uuo", "Ununoctium" )
        , ( "Uup", "Ununpentium" )
        , ( "Uus", "Ununseptium" )
        , ( "Uut", "Ununtrium" )
        , ( "V", "Vanadium" )
        , ( "W", "Tungsten" )
        , ( "Xe", "Xenon" )
        , ( "Y", "Yttrium" )
        , ( "Yb", "Ytterbium" )
        , ( "Zn", "Zinc" )
        , ( "Zr", "Zirconium" )
        ]

 -- TYPES
 --  describing formula syntax

 type alias Compound =
    List QuantifiedGroup


 type alias QuantifiedGroup =
    ( Group, Int )


 type alias Element =
    String


 type Group
    = El Element
    | Comp Compound


 lookupEl : String -> Maybe String
 lookupEl s = get s data

 -- PARSERS

 -- <$> is the applicative version of map
 --     so if lookupEl always succeeded, we could use <$>
 --     however it may fail with Maybe, so <$?> 
 --     fails the whole parser if lookup produces Nothing
 --     TODO - probably a better way to do this ..
 --
 --     AppParser.ident specifies parsers for 1st and subsequent chars
 --     so here we use it to ensure the first letter is a capital
 --     TODO - I suppose we could trim elements to ensure they are only 1 or 2 char ..

 elname : Parser String
 elname  = lookupEl <$?> (ident pUpper pLower)

 -- <$> lets us transform parser results, making it easy to build a type

 element : Parser Group
 element = El <$> elname

 -- orElse is actually the 'alternative' operator (<|>), so we could write:
 -- group = element <|> bracketedCompound

 group : Parser Group
 group = orElse 
            element 
            (\s -> bracketedCompound s)  -- recursion issue

 -- <$??>    String.toInt produces a Result type
 --          so <$??> is similar to <$?> and fails the whole parser
 --
 --          many1Char parses a sequence of 1 or more chars into a String
 --          unfortunately, elm Strings are not List Char, so we need
 --          separate combinators for Strings and Lists (see compound)

 subscript : Parser Int
 subscript = orElse (String.toInt <$??> (many1Char pDigit)) (pureParser 1)


 -- this can be written instead in canonical applicative style:
 -- quantifiedGroup = (,) <$> group <*> subscript
 --
 -- which can be read as applying some 'function' to a sequence of arguments
 -- this makes it a very natural notation

 quantifiedGroup : Parser QuantifiedGroup
 quantifiedGroup = pureParser (,) <*> group <*> subscript

 -- *> and <* are used to 'throw away' intermediate results
 -- in the same way that elm-tools/parser uses |. and |=
 -- Here, we use them to consume the brackets.
 --
 -- Again, we could write (ignoring recursion problems):
 -- bracketedCompound = char '(' *> (Comp <$> compound ) <* char ')'

 bracketedCompound : Parser Group
 bracketedCompound = pureParser identity
        <*> char '(' 
        *>  (Comp <$> (\s -> compound s))    -- recursion
        <*  char ')'

 -- the combinators many and many1 can be used to parse sequences
 -- of items, producing a list

 compound : Parser Compound
 -- compound = many1 quantifiedGroup
 compound = (\s -> many1 quantifiedGroup s)

 -- some test values

 h2o : Compound
 h2o =
    [ ( El "Hydrogen", 2 )
    , ( El "Oxygen", 1 )
    ]

 al2so43 = "Al2(SO4)3"

 --Glycidoxypropyltrimethoxysilane
 gp3msilane = "C9H20O5Si"

 -- hyaluronic acid
 hyaluronic2 = "(C14H21NO11)2"
	-- Cambridge Elm Meetup problem, chemical formulae,
	-- implemented using Applicative Parser Combinators
	--

	module AppChemParser exposing (..)

	import Char exposing (..)
	import Dict exposing (..)
	import Html exposing (Attribute, Html, beginnerProgram, div, input, text)
	import Html.Attributes exposing (..)
	import Html.Events exposing (onInput)
	import AppParser exposing (..)
	import String

	-- DICTIONARY
	-- for mapping Elemental symbols to Element names

	data : Dict String String
	data =
	Dict.fromList
	[ ( "Ac", "Actinium" )
	, ( "Ag", "Silver" )
	, ( "Al", "Aluminium" )
	, ( "Am", "Americium" )
	, ( "Ar", "Argon" )
	, ( "As", "Arsenic" )
	, ( "At", "Astatine" )
	, ( "Au", "Gold" )
	, ( "B", "Boron" )
	, ( "Ba", "Barium" )
	, ( "Be", "Beryllium" )
	, ( "Bh", "Bohrium" )
	, ( "Bi", "Bismuth" )
	, ( "Bk", "Berkelium" )
	, ( "Br", "Bromine" )
	, ( "C", "Carbon" )
	, ( "Ca", "Calcium" )
	, ( "Cd", "Cadmium" )
	, ( "Ce", "Cerium" )
	, ( "Cf", "Californium" )
	, ( "Cl", "Chlorine" )
	, ( "Cm", "Curium" )
	, ( "Cn", "Copernicium" )
	, ( "Co", "Cobalt" )
	, ( "Cr", "Chromium" )
	, ( "Cs", "Cesium" )
	, ( "Cu", "Copper" )
	, ( "Db", "Dubnium" )
	, ( "Ds", "Darmstadtium" )
	, ( "Dy", "Dysprosium" )
	, ( "Er", "Erbium" )
	, ( "Es", "Einsteinium" )
	, ( "Eu", "Europium" )
	, ( "F", "Fluorine" )
	, ( "Fe", "Iron" )
	, ( "Fl", "Flerovium" )
	, ( "Fm", "Fermium" )
	, ( "Fr", "Francium" )
	, ( "Ga", "Gallium" )
	, ( "Gd", "Gadolinium" )
	, ( "Ge", "Germanium" )
	, ( "H", "Hydrogen" )
	, ( "He", "Helium" )
	, ( "Hf", "Hafnium" )
	, ( "Hg", "Mercury" )
	, ( "Ho", "Holmium" )
	, ( "Hs", "Hassium" )
	, ( "I", "Iodine" )
	, ( "In", "Indium" )
	, ( "Ir", "Iridium" )
	, ( "K", "Potassium" )
	, ( "Kr", "Krypton" )
	, ( "La", "Lanthanum" )
	, ( "Li", "Lithium" )
	, ( "Lr", "Lawrencium" )
	, ( "Lu", "Lutetium" )
	, ( "Lv", "Livermorium" )
	, ( "Md", "Mendelevium" )
	, ( "Mg", "Magnesium" )
	, ( "Mn", "Manganese" )
	, ( "Mo", "Molybdenum" )
	, ( "Mt", "Meitnerium" )
	, ( "N", "Nitrogen" )
	, ( "Na", "Sodium" )
	, ( "Nb", "Niobium" )
	, ( "Nd", "Neodymium" )
	, ( "Ne", "Neon" )
	, ( "Ni", "Nickel" )
	, ( "No", "Nobelium" )
	, ( "Np", "Neptunium" )
	, ( "O", "Oxygen" )
	, ( "Os", "Osmium" )
	, ( "P", "Phosphorus" )
	, ( "Pa", "Protactinium" )
	, ( "Pb", "Lead" )
	, ( "Pd", "Palladium" )
	, ( "Pm", "Promethium" )
	, ( "Po", "Polonium" )
	, ( "Pr", "Praseodymium" )
	, ( "Pt", "Platinum" )
	, ( "Pu", "Plutonium" )
	, ( "Ra", "Radium" )
	, ( "Rb", "Rubidium" )
	, ( "Re", "Rhenium" )
	, ( "Rf", "Rutherfordium" )
	, ( "Rg", "Roentgenium" )
	, ( "Rh", "Rhodium" )
	, ( "Rn", "Radon" )
	, ( "Ru", "Ruthenium" )
	, ( "S", "Sulphur" )
	, ( "Sb", "Antimony" )
	, ( "Sc", "Scandium" )
	, ( "Se", "Selenium" )
	, ( "Sg", "Seaborgium" )
	, ( "Si", "Silicon" )
	, ( "Sm", "Samarium" )
	, ( "Sn", "Tin" )
	, ( "Sr", "Strontium" )
	, ( "Ta", "Tantalum" )
	, ( "Tb", "Terbium" )
	, ( "Tc", "Technetium" )
	, ( "Te", "Tellurium" )
	, ( "Th", "Thorium" )
	, ( "Ti", "Titanium" )
	, ( "Tl", "Thallium" )
	, ( "Tm", "Thulium" )
	, ( "U", "Uranium" )
	, ( "Uuo", "Ununoctium" )
	, ( "Uup", "Ununpentium" )
	, ( "Uus", "Ununseptium" )
	, ( "Uut", "Ununtrium" )
	, ( "V", "Vanadium" )
	, ( "W", "Tungsten" )
	, ( "Xe", "Xenon" )
	, ( "Y", "Yttrium" )
	, ( "Yb", "Ytterbium" )
	, ( "Zn", "Zinc" )
	, ( "Zr", "Zirconium" )
	]

	-- TYPES
	-- describing formula syntax

	type alias Compound =
	List QuantifiedGroup


	type alias QuantifiedGroup =
	( Group, Int )


	type alias Element =
	String


	type Group
	= El Element
	\| Comp Compound


	lookupEl : String -> Maybe String
	lookupEl s = get s data

	-- PARSERS

	-- <$> is the applicative version of map
	-- so if lookupEl always succeeded, we could use <$>
	-- however it may fail with Maybe, so <$?>
	-- fails the whole parser if lookup produces Nothing
	-- TODO - probably a better way to do this ..
	--
	-- AppParser.ident specifies parsers for 1st and subsequent chars
	-- so here we use it to ensure the first letter is a capital
	-- TODO - I suppose we could trim elements to ensure they are only 1 or 2 char ..

	elname : Parser String
	elname = lookupEl <$?> (ident pUpper pLower)

	-- <$> lets us transform parser results, making it easy to build a type

	element : Parser Group
	element = El <$> elname

	-- orElse is actually the 'alternative' operator (<\|>), so we could write:
	-- group = element <\|> bracketedCompound

	group : Parser Group
	group = orElse
	element
	(\s -> bracketedCompound s) -- recursion issue

	-- <$??> String.toInt produces a Result type
	-- so <$??> is similar to <$?> and fails the whole parser
	--
	-- many1Char parses a sequence of 1 or more chars into a String
	-- unfortunately, elm Strings are not List Char, so we need
	-- separate combinators for Strings and Lists (see compound)

	subscript : Parser Int
	subscript = orElse (String.toInt <$??> (many1Char pDigit)) (pureParser 1)


	-- this can be written instead in canonical applicative style:
	-- quantifiedGroup = (,) <$> group <*> subscript
	--
	-- which can be read as applying some 'function' to a sequence of arguments
	-- this makes it a very natural notation

	quantifiedGroup : Parser QuantifiedGroup
	quantifiedGroup = pureParser (,) <> group <> subscript

	-- > and < are used to 'throw away' intermediate results
	-- in the same way that elm-tools/parser uses \|. and \|=
	-- Here, we use them to consume the brackets.
	--
	-- Again, we could write (ignoring recursion problems):
	-- bracketedCompound = char '(' > (Comp <$> compound ) < char ')'

	bracketedCompound : Parser Group
	bracketedCompound = pureParser identity
	<*> char '('
	*> (Comp <$> (\s -> compound s)) -- recursion
	<* char ')'

	-- the combinators many and many1 can be used to parse sequences
	-- of items, producing a list

	compound : Parser Compound
	-- compound = many1 quantifiedGroup
	compound = (\s -> many1 quantifiedGroup s)

	-- some test values

	h2o : Compound
	h2o =
	[ ( El "Hydrogen", 2 )
	, ( El "Oxygen", 1 )
	]

	al2so43 = "Al2(SO4)3"

	--Glycidoxypropyltrimethoxysilane
	gp3msilane = "C9H20O5Si"

	-- hyaluronic acid
	hyaluronic2 = "(C14H21NO11)2"
No results found