# As it turns out, the CSV produced by NHTSA is broken. # So now, let's just iterate through all possible JSON values (assuming no car is at 10000) # then use jq to collect all possible keys (which varies widely) # and then map every result to that array of keys mkdir -p json/vehicles for id in $(seq 1 10000); do echo "$id.json" curl -sS "http://www.nhtsa.gov/webapi/api/SafetyRatings/VehicleId/$id?format=json" -o "json/vehicles/$id.json" done # remove bad json find ./json/vehicles -name "*.json" | xargs grep -l '' | xargs rm # using this expression: sed 's/\\r\\n//g' | sed 's/\\u00A0//g' # because these characters are inexplicably displayed as literal characters, until they aren't. # get the keys allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | sed 's/\\r\\n//g' | sed 's/\\u00A0//g'| paste -s -d ',' -) echo $allkeys | tr -d '.' | csvfix echo -osep '|' -smq > all-vehicles.psv find ./json/vehicles -name "*.json" | xargs cat | sed 's/\\r\\n//g' | sed 's/\\u00A0//g' | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo -osep '|' -smq >> all-vehicles.psv