Last active
November 22, 2024 09:56
-
-
Save olksdr/58a32d735b9bcaf86745 to your computer and use it in GitHub Desktop.
A simple ad-hoc script to parse pmta accounting files to recover bounces..
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| use feature qw(say); | |
| use strict; | |
| use warnings; | |
| use utf8; | |
| use 5.16.0; | |
| use IO::Handle; | |
| use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error); | |
| use POSIX qw(strftime mktime); | |
| use Date::Parse; | |
| use Text::ParseWords; | |
| use File::Copy; | |
| use File::Path qw(make_path); | |
| use Cwd; | |
| sub main { | |
| my $date_to_parse = shift; | |
| my $basedir = '/mnt/data/archive/upstream/acct/'; | |
| my $parsed_header; | |
| my $script_dir = cwd(); | |
| opendir(my $dh, $basedir) || die "Failed to open $basedir: $!"; | |
| while(readdir $dh) { | |
| if(/(?<pmta>e3uspmta.+)/) { | |
| my $current_pmta = $+{pmta}; | |
| say " Working on: $current_pmta"; | |
| opendir(my $indh, $basedir . $current_pmta) || die "Failed to open $current_pmta: $!"; | |
| while(my $file = readdir($indh)) { | |
| if($file =~ m/acct-$$date_to_parse/) { | |
| my $ff = $1 if $file =~ /(.+)?\.bz2$/; | |
| my $path_to_work_in = "$script_dir/$$date_to_parse/$current_pmta"; | |
| if(! -d $path_to_work_in) { | |
| make_path($path_to_work_in) || die "can't create the path $path_to_work_in: $!"; | |
| } | |
| open(my $of, ">$path_to_work_in/e3us-$current_pmta-$ff") || die "can't create a file $ff in $path_to_work_in: $!"; | |
| my $bzbuffer; | |
| bunzip2 $basedir . $current_pmta . "/" . $file => \$bzbuffer or die "bunzip2 failed: $Bunzip2Error\n"; | |
| my $i = 0; | |
| for my $line(split /\n/, $bzbuffer) { | |
| if($i == 0) { | |
| my @h = Text::ParseWords::parse_line(',', 0, $line); | |
| my $y = 0; | |
| for my $value(@h) { | |
| $parsed_header->{$value} = $y++; | |
| } | |
| say $of "Host,\"Virtual MTA\",From,To,\"Time Delivered\",\"Time Bounced\",\"DSN Diagnostics\",\"DSN Status\",Status,Header"; | |
| } | |
| $i++; | |
| ################################## | |
| # Here goes the concrete parsing # | |
| ################################## | |
| my @ln = Text::ParseWords::parse_line(',', 0, $line); | |
| if($ln[$parsed_header->{orig}] =~ /^e3us-[0-9]+-[A-Fa-f0-9]+[iI]{2}[A-Fa-f0-9]+/ && $ln[$parsed_header->{type}] eq 'b') { | |
| my $time_deliv = str2time($ln[$parsed_header->{timeQueued}]); | |
| my $time_bounced = str2time($ln[$parsed_header->{timeLogged}]); | |
| say $of "$current_pmta,$ln[$parsed_header->{vmta}],$ln[$parsed_header->{orig}],$ln[$parsed_header->{rcpt}],$time_deliv,$time_bounced,\"$ln[$parsed_header->{dsnDiag}]\",\"$ln[$parsed_header->{dsnStatus}]\",3,\"X-EMarSys-Identify: $ln[$parsed_header->{'header_X-EMarSys-Identify'}], X-EMarSys-Environment: $ln[$parsed_header->{'header_X-EMarSys-Environment'}], message-id: $ln[$parsed_header->{'header_message-id'}], From: $ln[$parsed_header->{header_From}]\"" if $time_deliv > 1412517900 && $time_deliv < 1412674800; | |
| } | |
| } | |
| close $of; | |
| } | |
| } | |
| closedir $indh; | |
| } | |
| } | |
| closedir $dh; | |
| } | |
| my @children; | |
| my $dates_to_parse = ["2014-10-05","2014-10-06","2014-10-07"]; | |
| foreach my $d (@$dates_to_parse) { | |
| my $pid = fork(); | |
| if($pid) { | |
| say "Created a child with pid: $pid"; | |
| push @children, $pid; | |
| } elsif ($pid == 0) { | |
| say "Parsing for $d"; | |
| main(\$d); | |
| exit 0; | |
| } | |
| } | |
| foreach(@children) { | |
| my $pp = waitpid($_, 0); | |
| say "Done with a child: $pp"; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment