Skip to content

Instantly share code, notes, and snippets.

@vdinovi
Last active September 15, 2022 22:03
Show Gist options
  • Select an option

  • Save vdinovi/a75f7145d89468f738a39a64572ca2d2 to your computer and use it in GitHub Desktop.

Select an option

Save vdinovi/a75f7145d89468f738a39a64572ca2d2 to your computer and use it in GitHub Desktop.
#!/usr/bin/ruby
require 'set'
require 'csv'
ARGV.count > 2 or raise "Usage: compare_csv <key>:<filename> <key>:<filename> <col:col> ..."
k1, f1 = ARGV[0].split(':')
k2, f2 = ARGV[1].split(':')
cols = ARGV[2..].map { |c| c.split(':') }
rows1, rows2 = [
[k1, f1],
[k2, f2]
].map do |(key, filename)|
CSV.foreach(File.expand_path(filename), headers: true).with_index(1).reduce({}) do |rows, (row, lineno)|
if (id = row[key])
rows[id] = row.to_h
else
#puts "Error: row on line #{lineno} of #{filename} is missing index key #{key}"
nil
end
rows
end
end
def normalize(value)
case value
when String
if value.empty?
nil
elsif value == "0" || value =~ /false/i
false
elsif value == "1" || value =~ /true/i
true
else
value
end
when nil
nil
else
raise NotImplementedError
end
end
(rows1.keys | rows2.keys).each do |key|
if !rows1.has_key?(key)
#puts "Error: file #{f1} has no row with key #{key}"
nil
elsif !rows2.has_key?(key)
#puts "Error: file #{f2} has no row with key #{key}"
nil
else
r1 = rows1[key]
r2 = rows2[key]
diff = cols.reduce({}) do |diff, (c1, c2)|
if (v1 = normalize(r1[c1])) != (v2 = normalize(r2[c2]))
diff[[c1, c2]] = [v1, v2]
end
diff
end
unless diff.empty?
puts "Error: rows for key #{key} differ (#{diff.inspect})"
else
puts "Rows for key #{key} matched"
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment