Created
November 23, 2010 17:52
-
-
Save jsanti/712183 to your computer and use it in GitHub Desktop.
Revisions
-
jsanti created this gist
Nov 23, 2010 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,26 @@ #!/usr/bin/perl # # Repairing broken documents that mix UTF-8 and ISO-8859-1 # http://plasmasturm.org/log/416/ # use strict; use warnings; use Encode qw( decode FB_QUIET ); binmode STDIN, ':bytes'; binmode STDOUT, ':encoding(UTF-8)'; my $out; while ( <> ) { $out = ''; while ( length ) { # consume input string up to the first UTF-8 decode error $out .= decode( "utf-8", $_, FB_QUIET ); # consume one character; all octets are valid Latin-1 $out .= decode( "iso-8859-1", substr( $_, 0, 1 ), FB_QUIET ) if length; } print $out; }