Skip to content

Instantly share code, notes, and snippets.

@sma
Created December 8, 2018 19:59
Show Gist options
  • Select an option

  • Save sma/f7ae4b9e28c1107eacd9cf3c5d54a285 to your computer and use it in GitHub Desktop.

Select an option

Save sma/f7ae4b9e28c1107eacd9cf3c5d54a285 to your computer and use it in GitHub Desktop.
A tiny XML parser written in Dart
/// Parses [input] as XML document.
/// It assumes that the document is well-formed.
/// CDATA sections and DTDs are not supported.
void parseXml(
String input, {
@required void Function(String name, Map<String, String> attributes) onStartTag,
@required void Function(String name) onEndTag,
@required void Function(String text) onText,
}) {
for (Match m in _tags.allMatches(input)) {
print(m.group(0));
if (m.group(1) != null) {
if (onEndTag != null) onEndTag(m.group(1));
} else if (m.group(2) != null) {
final Map<String, String> attributes = {};
for (Match m2 in _attributes.allMatches(m.group(3))) {
final name = m2.group(1);
final value = m2.group(2);
attributes[name] = _replaceEntities(value.substring(1, value.length - 1));
}
if (onStartTag != null) onStartTag(m.group(2), attributes);
if (m.group(4) != null) {
if (onEndTag != null) onEndTag(m.group(2));
}
} else if (m.group(5) != null) {
if (onText != null) onText(m.group(5));
} else if (m.group(6) != null) {
if (onText != null) onText(_replaceEntities(m.group(6)));
}
}
}
/// Replaces all `&...;` entities in [input].
String _replaceEntities(String input) {
return input.replaceAllMapped(_entities, (m) {
if (m.group(1) != null) {
return String.fromCharCode(int.parse(m.group(1), radix: 16));
}
if (m.group(2) != null) {
return String.fromCharCode(int.parse(m.group(2)));
}
switch (m.group(3)) {
case 'lt':
return '<';
case 'gt':
return '>';
case 'amp':
return '&';
case 'quot':
return '"';
case 'apos':
return "'";
}
});
}
// group 1 = end, 2 = start, 3 = attributes, 4 = empty, 5 = cdata, 6 = text
final _tags = RegExp(
r'</([-.:\w]+)\s*>|'
r'<([-.:\w]+)'
r'''((?:\s+[-.:\w]+\s*=\s*(?:"[^"]*"|'[^']*'))*)'''
r'\s*(/)?>|'
r'<!\[CDATA\[([\s\S]*?)]]>|'
r'<\?[\s\S]*?\?>|'
r'<!--[\s\S]*?[^-]-->|'
r'([^<]+|.)|',
multiLine: true);
// group 1 = name, 2 = quoted value
final _attributes = RegExp(r'''([-.:\w]+)\s*=\s*("[^"]*"|'[^']*')''', multiLine: true);
// group 1 = hex value, 2 = dec value, 3 = named
final _entities = RegExp(r'&#x([0-9a-fA-F]+);|&#([0-9]+);|&(lt|gt|amp|quot|apos);');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment