Created
February 29, 2016 12:02
-
-
Save tkurita/a22ca1c68c49f3efc633 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| use strict; | |
| use warnings; | |
| use HTML::TreeBuilder; | |
| use HTML::Element; | |
| my $src = 'history/2016/index-light.xhtml'; | |
| my $tree = HTML::TreeBuilder->new; | |
| $tree->store_comments(1); | |
| $tree->store_pis(1); | |
| $tree->no_expand_entities(1); | |
| $tree->ignore_unknown(0); | |
| $tree->ignore_ignorable_whitespace(0); | |
| $tree->no_space_compacting(1); | |
| $tree->implicit_tags(0); #reuire to keep order of pis and comments just under the html tag. | |
| $tree->parse_file($src); | |
| $tree->eof(); | |
| %HTML::Element::optionalEndTag = (); # don't ommit end tag | |
| foreach my $c ($tree->guts()) { | |
| print ref $c ? $c->as_HTML('', ' ') : $c; | |
| # second argument is required to obtain indented and line returned result | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment