Created
September 9, 2017 13:35
-
-
Save oceanborn2/4af875cb33fe7197b8508a451d2b7b2b to your computer and use it in GitHub Desktop.
xls data parsing with example CSV parsing commented out + XLS parsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! perl.exe | |
| use strict; | |
| use warnings FATAL => 'all'; | |
| use Encode; | |
| use Cwd; | |
| use utf8; | |
| use Spreadsheet::ParseExcel; | |
| sub getVal { | |
| my $cell = shift; | |
| return "" unless defined($cell); | |
| return $cell->value(); | |
| } | |
| sub readExcel { | |
| my $fname = shift; | |
| my $parser = Spreadsheet::ParseExcel->new(); | |
| my $workbook = $parser->parse($fname); | |
| if (!defined $workbook) { | |
| die $parser->error(), ".\n"; | |
| } | |
| my @rows; | |
| my $worksheet = $workbook->worksheet(0); | |
| #for my $worksheet ($workbook->worksheets()) { | |
| my ( $row_min, $row_max ) = $worksheet->row_range(); | |
| for my $row ($row_min .. $row_max) { | |
| next if ($row == 0); # ignoring header | |
| my %vals; | |
| $vals{Key} = getVal($worksheet->get_cell($row, 0)); | |
| $vals{Action} = getVal($worksheet->get_cell($row, 1)); | |
| $vals{LG} = getVal($worksheet->get_cell($row, 2)); | |
| $vals{Case} = getVal($worksheet->get_cell($row, 3)); | |
| $vals{PropKey} = getVal($worksheet->get_cell($row, 4)); | |
| $vals{"en-US"} = getVal($worksheet->get_cell($row, 5)); | |
| $vals{"fr-FR"} = getVal($worksheet->get_cell($row, 6)); | |
| $vals{PrevFR} = getVal($worksheet->get_cell($row, 7)); | |
| $vals{Alt} = getVal($worksheet->get_cell($row, 8)); | |
| $vals{Comment} = getVal($worksheet->get_cell($row, 9)); | |
| push(@rows, \%vals); | |
| } | |
| #} | |
| @rows; | |
| } | |
| sub writeJSON { | |
| my $fname = shift; | |
| open(my $fh, "> $fname") or die "err: $!"; | |
| binmode $fh, ":utf8"; | |
| my $lines = shift; | |
| my @res = map { | |
| my $hr = $_; | |
| eval { | |
| my $k = $hr->{Key}; | |
| chomp($k); | |
| my $v = $hr->{"fr-FR"}; | |
| chomp($v); | |
| while ($v =~ m/\r/og) {$v =~ s/\r//og;} | |
| while ($v =~ m/\n/og) {$v =~ s/\n//og;} | |
| while ($k =~ m/"/og) {$k =~ s/"//og;} | |
| while ($v =~ m/"/og) {$v =~ s/"//og;} | |
| $v = "" unless (defined($v)); | |
| $k = "" unless (defined($k)); | |
| $_ = "\t\t\"$k\" : \"$v\""; | |
| }; | |
| my $res = $_; | |
| if ($@) { | |
| local $_ = $@; | |
| print STDERR $@ . "\n"; | |
| } | |
| else { | |
| $res; | |
| } | |
| } @$lines; | |
| print $fh "{\n"; | |
| print $fh join(",\n", @res); | |
| print $fh "\n\n}\n"; | |
| close($fh); | |
| } | |
| my $currdir = getcwd; | |
| my $infname = "${currdir}/CDNContent.xls"; | |
| my $outfname = "${currdir}/fr-FR.json"; | |
| my $outfnameAdd = "${currdir}/fr-FR_extra.json"; | |
| #my $csv = Text::CSV::Encoded->new ({ | |
| # encoding_in => 'windows-1252', | |
| # encoding_out => 'utf-8', | |
| # sep_char => ';', | |
| # quote_char => '"', | |
| # binary => 0, | |
| #}); | |
| # | |
| ## read the input file into an array of hashes | |
| #open (my $fin, $infname) or die "err:$!"; | |
| #my @hfields = $csv->getline($fin); | |
| #$csv->column_names(@hfields); | |
| #my @lines; | |
| #while (my $hr = $csv->getline_hr($fin)) { | |
| # push(@lines, $hr); | |
| #} | |
| my @lines = readExcel($infname); | |
| my @selok = grep {my $hr = $_; | |
| my $act = $hr->{Action}; | |
| $hr if ($act =~ m/[NUGMK]+/og); | |
| } @lines; | |
| my @seloth = grep {my $hr = $_; | |
| my $act = $hr->{Action}; | |
| $hr if ($act !~ m/[NUGMK]+/og); | |
| } @lines; | |
| print "selok: " . scalar @selok . "\r\n"; | |
| print "seloth: " . scalar @seloth . "\r\n"; | |
| writeJSON($outfname, \@selok); | |
| writeJSON($outfnameAdd, \@seloth); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment