#!/usr/bin/env perl use strict; use warnings; =head1 NAME gpcq - Generate a Purge Check Query =head1 SYNOPSIS C find.sql> =head1 DESCRIPTION Run this script on a dump of the function definition of the actor.usr_purge_data function from your Evergreen installation to generate a query to find deleted users with unpurged data. You may then use the output of this query to purge thosse users' data. You may dump the function definition with the following command line, adding appropriate connection parameters for your system: C After that, you may generate the query with a command like in the synopsis, or you can pipe the output directly into psql like so: C Again, you will need to add any connection parameters as appropriate to psql. The use of the C<--csv> option and the output filename are suggestions. You can, of course, get the output in any format you prefer. The generated query will dump the id, usrname, and deletion date of the user from the actor.usr table as well as an array of the table names where the unpurged data appears for each patron that has been deleted but has has some unpurged data remaining. The query is generated from the update and delete statements in the actor.usr_purge_data function definition. What this function does can vary by Evergreen release, so this is why you must dump the function defintion to a file in order for gpcq to parse it. gpcq takes no options other than the filename of the function definition. =head1 AUTHOR Jason Stephenson =head1 COPYRIGHT AND LICENSE Copyright 2025 C/W MARS, Inc. gpcq is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. gpcq is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with gpcq. If not, see . =cut my %data = (); if (open my $fh, "<", $ARGV[0]) { my ($table, $field); while (my $line = <$fh>) { if ($line =~ /DELETE\s+FROM\s+([^[:space:]]+)\s+WHERE\s([^[:space:]]+)\s+=\s+src_usr/) { $table = $1; $field = $2; } elsif ($line =~ /UPDATE\s+([^[:space:]]+)/) { $table = $1; } if ($line =~ /SET\s+([^[:space:]]+)\s+=\s+dest_usr/) { $field = $1; } if ($field) { add_to_data($table, $field); } undef($field); } close($fh); } print("SELECT id, usrname, delete_date, array_agg(table_name) as tables\n"); print("FROM (\n"); my $times_used = 0; foreach my $k (sort keys %data) { if ($times_used) { print("UNION\n"); } print("SELECT DISTINCT usr${times_used}.id, usr${times_used}.usrname, "); print("usr${times_used}.create_date as delete_date, '$k' as table_name\n"); print("FROM actor.usr usr${times_used}\n"); print("JOIN $k ON "); my $table = $1 if ($k =~ /^.*\.(.*)$/); my @fields = @{$data{$k}}; if (@fields > 1) { my $f = 0; print("("); foreach my $field (@fields) { if ($f++) { print(" OR "); } print("${table}.${field} = usr${times_used}.id"); } print(")\n"); } else { print("${table}.${fields[0]} = usr${times_used}.id\n"); } print("WHERE usr${times_used}.deleted\n"); $times_used++; } print(")\nGROUP BY id, usrname, delete_date\n"); print("ORDER BY delete_date, id\n"); sub add_to_data { my $table = shift; my $field = shift; unless (defined $data{$table}) { $data{$table} = (); } push @{$data{$table}}, $field; }