Skip to content

Instantly share code, notes, and snippets.

@olksdr
Last active November 22, 2024 09:56
Show Gist options
  • Select an option

  • Save olksdr/58a32d735b9bcaf86745 to your computer and use it in GitHub Desktop.

Select an option

Save olksdr/58a32d735b9bcaf86745 to your computer and use it in GitHub Desktop.
A simple ad-hoc script to parse pmta accounting files to recover bounces..
#!/usr/bin/env perl
use feature qw(say);
use strict;
use warnings;
use utf8;
use 5.16.0;
use IO::Handle;
use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error);
use POSIX qw(strftime mktime);
use Date::Parse;
use Text::ParseWords;
use File::Copy;
use File::Path qw(make_path);
use Cwd;
sub main {
my $date_to_parse = shift;
my $basedir = '/mnt/data/archive/upstream/acct/';
my $parsed_header;
my $script_dir = cwd();
opendir(my $dh, $basedir) || die "Failed to open $basedir: $!";
while(readdir $dh) {
if(/(?<pmta>e3uspmta.+)/) {
my $current_pmta = $+{pmta};
say " Working on: $current_pmta";
opendir(my $indh, $basedir . $current_pmta) || die "Failed to open $current_pmta: $!";
while(my $file = readdir($indh)) {
if($file =~ m/acct-$$date_to_parse/) {
my $ff = $1 if $file =~ /(.+)?\.bz2$/;
my $path_to_work_in = "$script_dir/$$date_to_parse/$current_pmta";
if(! -d $path_to_work_in) {
make_path($path_to_work_in) || die "can't create the path $path_to_work_in: $!";
}
open(my $of, ">$path_to_work_in/e3us-$current_pmta-$ff") || die "can't create a file $ff in $path_to_work_in: $!";
my $bzbuffer;
bunzip2 $basedir . $current_pmta . "/" . $file => \$bzbuffer or die "bunzip2 failed: $Bunzip2Error\n";
my $i = 0;
for my $line(split /\n/, $bzbuffer) {
if($i == 0) {
my @h = Text::ParseWords::parse_line(',', 0, $line);
my $y = 0;
for my $value(@h) {
$parsed_header->{$value} = $y++;
}
say $of "Host,\"Virtual MTA\",From,To,\"Time Delivered\",\"Time Bounced\",\"DSN Diagnostics\",\"DSN Status\",Status,Header";
}
$i++;
##################################
# Here goes the concrete parsing #
##################################
my @ln = Text::ParseWords::parse_line(',', 0, $line);
if($ln[$parsed_header->{orig}] =~ /^e3us-[0-9]+-[A-Fa-f0-9]+[iI]{2}[A-Fa-f0-9]+/ && $ln[$parsed_header->{type}] eq 'b') {
my $time_deliv = str2time($ln[$parsed_header->{timeQueued}]);
my $time_bounced = str2time($ln[$parsed_header->{timeLogged}]);
say $of "$current_pmta,$ln[$parsed_header->{vmta}],$ln[$parsed_header->{orig}],$ln[$parsed_header->{rcpt}],$time_deliv,$time_bounced,\"$ln[$parsed_header->{dsnDiag}]\",\"$ln[$parsed_header->{dsnStatus}]\",3,\"X-EMarSys-Identify: $ln[$parsed_header->{'header_X-EMarSys-Identify'}], X-EMarSys-Environment: $ln[$parsed_header->{'header_X-EMarSys-Environment'}], message-id: $ln[$parsed_header->{'header_message-id'}], From: $ln[$parsed_header->{header_From}]\"" if $time_deliv > 1412517900 && $time_deliv < 1412674800;
}
}
close $of;
}
}
closedir $indh;
}
}
closedir $dh;
}
my @children;
my $dates_to_parse = ["2014-10-05","2014-10-06","2014-10-07"];
foreach my $d (@$dates_to_parse) {
my $pid = fork();
if($pid) {
say "Created a child with pid: $pid";
push @children, $pid;
} elsif ($pid == 0) {
say "Parsing for $d";
main(\$d);
exit 0;
}
}
foreach(@children) {
my $pp = waitpid($_, 0);
say "Done with a child: $pp";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment