Created
December 8, 2018 19:59
-
-
Save sma/f7ae4b9e28c1107eacd9cf3c5d54a285 to your computer and use it in GitHub Desktop.
A tiny XML parser written in Dart
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// Parses [input] as XML document. | |
| /// It assumes that the document is well-formed. | |
| /// CDATA sections and DTDs are not supported. | |
| void parseXml( | |
| String input, { | |
| @required void Function(String name, Map<String, String> attributes) onStartTag, | |
| @required void Function(String name) onEndTag, | |
| @required void Function(String text) onText, | |
| }) { | |
| for (Match m in _tags.allMatches(input)) { | |
| print(m.group(0)); | |
| if (m.group(1) != null) { | |
| if (onEndTag != null) onEndTag(m.group(1)); | |
| } else if (m.group(2) != null) { | |
| final Map<String, String> attributes = {}; | |
| for (Match m2 in _attributes.allMatches(m.group(3))) { | |
| final name = m2.group(1); | |
| final value = m2.group(2); | |
| attributes[name] = _replaceEntities(value.substring(1, value.length - 1)); | |
| } | |
| if (onStartTag != null) onStartTag(m.group(2), attributes); | |
| if (m.group(4) != null) { | |
| if (onEndTag != null) onEndTag(m.group(2)); | |
| } | |
| } else if (m.group(5) != null) { | |
| if (onText != null) onText(m.group(5)); | |
| } else if (m.group(6) != null) { | |
| if (onText != null) onText(_replaceEntities(m.group(6))); | |
| } | |
| } | |
| } | |
| /// Replaces all `&...;` entities in [input]. | |
| String _replaceEntities(String input) { | |
| return input.replaceAllMapped(_entities, (m) { | |
| if (m.group(1) != null) { | |
| return String.fromCharCode(int.parse(m.group(1), radix: 16)); | |
| } | |
| if (m.group(2) != null) { | |
| return String.fromCharCode(int.parse(m.group(2))); | |
| } | |
| switch (m.group(3)) { | |
| case 'lt': | |
| return '<'; | |
| case 'gt': | |
| return '>'; | |
| case 'amp': | |
| return '&'; | |
| case 'quot': | |
| return '"'; | |
| case 'apos': | |
| return "'"; | |
| } | |
| }); | |
| } | |
| // group 1 = end, 2 = start, 3 = attributes, 4 = empty, 5 = cdata, 6 = text | |
| final _tags = RegExp( | |
| r'</([-.:\w]+)\s*>|' | |
| r'<([-.:\w]+)' | |
| r'''((?:\s+[-.:\w]+\s*=\s*(?:"[^"]*"|'[^']*'))*)''' | |
| r'\s*(/)?>|' | |
| r'<!\[CDATA\[([\s\S]*?)]]>|' | |
| r'<\?[\s\S]*?\?>|' | |
| r'<!--[\s\S]*?[^-]-->|' | |
| r'([^<]+|.)|', | |
| multiLine: true); | |
| // group 1 = name, 2 = quoted value | |
| final _attributes = RegExp(r'''([-.:\w]+)\s*=\s*("[^"]*"|'[^']*')''', multiLine: true); | |
| // group 1 = hex value, 2 = dec value, 3 = named | |
| final _entities = RegExp(r'&#x([0-9a-fA-F]+);|&#([0-9]+);|&(lt|gt|amp|quot|apos);'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment