Skip to content

Instantly share code, notes, and snippets.

@dannguyen
Last active June 13, 2022 20:04
Show Gist options
  • Select an option

  • Save dannguyen/57423dbcb1713d31b659 to your computer and use it in GitHub Desktop.

Select an option

Save dannguyen/57423dbcb1713d31b659 to your computer and use it in GitHub Desktop.

Revisions

  1. dannguyen revised this gist Feb 9, 2015. 1 changed file with 44 additions and 0 deletions.
    44 changes: 44 additions & 0 deletions sample.json
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,44 @@
    {
    "ComplaintsCount": 185,
    "FrontCrashDriversideNotes": null,
    "FrontCrashDriversideRating": "3",
    "FrontCrashDriversideSafetyConcern": null,
    "FrontCrashPassengersideNotes": null,
    "FrontCrashPassengersideRating": "2",
    "FrontCrashPicture": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/images/2011/v07127P062.jpg",
    "FrontCrashVideo": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/movies/2011/v07127C017.wmv",
    "FrontPassengersideSafetyConcern": null,
    "InvestigationCount": 1,
    "Make": "RAM",
    "Model": "1500 QUAD",
    "ModelYear": 2011,
    "NHTSAElectronicStabilityControl": "Standard",
    "NHTSAForwardCollisionWarning": "No",
    "NHTSALaneDepartureWarning": "No",
    "NHTSARearviewVideoSystems": null,
    "OverallFrontCrashRating": "2",
    "OverallRating": "3",
    "OverallSideCrashRating": "5",
    "RecallsCount": 2,
    "RolloverNotes": null,
    "RolloverPossibility": 0.198,
    "RolloverPossibility2": 0,
    "RolloverRating": "4",
    "RolloverRating2": "Not Rated",
    "SideCrashDriversideNotes": null,
    "SideCrashDriversideRating": "5",
    "SideCrashDriversideSafetyConcern": null,
    "SideCrashPassengersideNotes": null,
    "SideCrashPassengersideRating": "5",
    "SideCrashPassengersideSafetyConcern": null,
    "SideCrashPicture": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/images/2011/v07129P078.jpg",
    "SideCrashVideo": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/movies/2011/v07129C013.wmv",
    "SidePoleCrashRating": "1",
    "SidePoleNotes": "Although not included in the star rating, the driver dummy's abdomen rib deflection and thoracic rib deflection readings were elevated",
    "SidePolePicture": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/images/2011/v07065P010.jpg",
    "SidePoleSafetyConcern": "Due to the intrusion of the driver door during the side impact pole test, the interior door panel struck the torso of the driver dummy, causing high resultant lower spine acceleration of 87 g's. High resultant lower spine accelerations, in excess of 82 g's, have a higher likelihood of thoracic injury.",
    "SidePoleVideo": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/movies/2011/v07065C012.wmv",
    "VehicleDescription": "2011 Ram 1500 Quad PU/CC RWD",
    "VehicleId": 109,
    "VehiclePicture": "http://www.safercar.gov/staticfiles/DOT/safercar/ncapmedia/images/2011/v07127P005.jpg"
    }
  2. dannguyen revised this gist Feb 9, 2015. 1 changed file with 5 additions and 1 deletion.
    6 changes: 5 additions & 1 deletion nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -20,4 +20,8 @@ find ./json/vehicles -name "*.json" | xargs grep -l '</html>' | xargs rm
    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | sed 's/\\r\\n//g' | sed 's/\\u00A0//g'| paste -s -d ',' -)
    echo $allkeys | tr -d '.' | csvfix echo -osep '|' -smq > all-vehicles.psv

    find ./json/vehicles -name "*.json" | xargs cat | sed 's/\\r\\n//g' | sed 's/\\u00A0//g' | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo -osep '|' -smq >> all-vehicles.psv
    find ./json/vehicles -name "*.json" | xargs cat | sed 's/\\r\\n//g' | sed 's/\\u00A0//g' | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo -osep '|' -smq >> all-vehicles.psv

    # MAKE A JSON FOR GOOD TIMES SAKE
    # refactor later
    find ./json/vehicles -name "*.json" | xargs cat | sed 's/\\r\\n//g' | sed 's/\\u00A0//g' | jq --sort-keys -r "select(.Count == 1) .Results[0] | {$(echo $allkeys | tr -d '.')}" | jq --slurp '.' > all-vehicles.json
  3. dannguyen revised this gist Feb 9, 2015. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -12,6 +12,9 @@ done
    # remove bad json
    find ./json/vehicles -name "*.json" | xargs grep -l '</html>' | xargs rm

    # using this expression: sed 's/\\r\\n//g' | sed 's/\\u00A0//g'
    # because these characters are inexplicably displayed as literal characters, until they aren't.


    # get the keys
    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | sed 's/\\r\\n//g' | sed 's/\\u00A0//g'| paste -s -d ',' -)
  4. dannguyen revised this gist Feb 9, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -14,7 +14,7 @@ find ./json/vehicles -name "*.json" | xargs grep -l '</html>' | xargs rm


    # get the keys
    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | paste -s -d ',' -)
    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | sed 's/\\r\\n//g' | sed 's/\\u00A0//g'| paste -s -d ',' -)
    echo $allkeys | tr -d '.' | csvfix echo -osep '|' -smq > all-vehicles.psv

    find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo -osep '|' -smq >> all-vehicles.psv
    find ./json/vehicles -name "*.json" | xargs cat | sed 's/\\r\\n//g' | sed 's/\\u00A0//g' | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo -osep '|' -smq >> all-vehicles.psv
  5. dannguyen revised this gist Feb 9, 2015. 1 changed file with 3 additions and 2 deletions.
    5 changes: 3 additions & 2 deletions nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -12,8 +12,9 @@ done
    # remove bad json
    find ./json/vehicles -name "*.json" | xargs grep -l '</html>' | xargs rm


    # get the keys
    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | paste -s -d ',' -)
    echo $allkeys | tr -d '.' > all-vehicles.csv
    echo $allkeys | tr -d '.' | csvfix echo -osep '|' -smq > all-vehicles.psv

    find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo >> all-vehicles.csv
    find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo -osep '|' -smq >> all-vehicles.psv
  6. dannguyen revised this gist Feb 9, 2015. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -9,9 +9,11 @@ for id in $(seq 1 10000); do
    curl -sS "http://www.nhtsa.gov/webapi/api/SafetyRatings/VehicleId/$id?format=json" -o "json/vehicles/$id.json"
    done

    # remove bad json
    find ./json/vehicles -name "*.json" | xargs grep -l '</html>' | xargs rm

    # get the keys
    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | paste -s -d ',' -)
    echo $allkeys | tr -d '.' > all-vehicles.csv

    find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" >> all-vehicles.csv
    find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" | csvfix echo >> all-vehicles.csv
  7. dannguyen revised this gist Feb 9, 2015. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,7 @@ done


    # get the keys
    allkeys=$(cat json/vehicles/*.json | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | paste -s -d ',' -)

    allkeys=$(find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | paste -s -d ',' -)
    echo $allkeys | tr -d '.' > all-vehicles.csv
    cat json/vehicles/*.json | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" >> all-vehicles.csv

    find ./json/vehicles -name "*.json" | xargs cat | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" >> all-vehicles.csv
  8. dannguyen revised this gist Feb 9, 2015. 2 changed files with 20 additions and 0 deletions.
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,6 @@
    ## Note: this is deprecated. jq is still awesome, so now we just get JSON all the way


    # jq JSON parser is awesome:
    # http://stedolan.github.io/jq/

    17 changes: 17 additions & 0 deletions nhtsa-5-star-api-scrape-json.bash
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,17 @@
    # As it turns out, the CSV produced by NHTSA is broken.
    # So now, let's just iterate through all possible JSON values (assuming no car is at 10000)
    # then use jq to collect all possible keys (which varies widely)
    # and then map every result to that array of keys

    mkdir -p json/vehicles
    for id in $(seq 1 10000); do
    echo "$id.json"
    curl -sS "http://www.nhtsa.gov/webapi/api/SafetyRatings/VehicleId/$id?format=json" -o "json/vehicles/$id.json"
    done


    # get the keys
    allkeys=$(cat json/vehicles/*.json | jq --sort-keys -r 'select(.Count == 1) .Results[0] | keys | @csv' | grep -oE '[[:alnum:]]+' | sort | uniq | sed -E 's/^/./' | paste -s -d ',' -)

    echo $allkeys | tr -d '.' > all-vehicles.csv
    cat json/vehicles/*.json | jq --sort-keys -r "select(.Count == 1) .Results | map($allkeys) | @csv" >> all-vehicles.csv
  9. dannguyen revised this gist Feb 9, 2015. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions nhtsa-5-star-api-scrape.bash
    Original file line number Diff line number Diff line change
    @@ -31,6 +31,7 @@ while read year; do
    echo " $id: $year - $carmake - $model"
    curl -s "$BURL/VehicleId/$id?format=csv" -o "$id.csv"
    done
    echo ' '
    done
    done
    done
  10. dannguyen revised this gist Feb 9, 2015. 1 changed file with 6 additions and 2 deletions.
    8 changes: 6 additions & 2 deletions nhtsa-5-star-api-scrape.bash
    Original file line number Diff line number Diff line change
    @@ -6,18 +6,22 @@
    #
    # I query for JSON for most of the loop, and in the end, I get the Vehicle data in CSV format

    # Note, there are a lot of errors in the API, because the NHTSA doesn't properly escape the "/" in a car's name. And many other
    # whitespace related errors.

    BURL='http://www.nhtsa.gov/webapi/api/SafetyRatings'

    # get all the years first
    curl -s "$BURL?format=json" | jq -r '.Results[] .ModelYear' | \
    while read year; do
    echo "$year"
    echo "######"
    curl -s "$BURL/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/g' | while read -r carmake; do
    curl -s "$BURL/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/g' | sed 's/&/_/g' | \
    while read -r carmake; do
    # Get the year and make
    echo " $carmake"
    echo " ======="
    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/g' | \
    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/g' | sed 's/&/_/g' | \
    while read -r model; do
    echo " $model"
    echo " -------"
  11. dannguyen revised this gist Feb 9, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions nhtsa-5-star-api-scrape.bash
    Original file line number Diff line number Diff line change
    @@ -13,11 +13,11 @@ curl -s "$BURL?format=json" | jq -r '.Results[] .ModelYear' | \
    while read year; do
    echo "$year"
    echo "######"
    curl -s "$BURL/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/' | while read -r carmake; do
    curl -s "$BURL/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/g' | while read -r carmake; do
    # Get the year and make
    echo " $carmake"
    echo " ======="
    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/' | \
    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/g' | \
    while read -r model; do
    echo " $model"
    echo " -------"
  12. dannguyen revised this gist Feb 9, 2015. 1 changed file with 10 additions and 11 deletions.
    21 changes: 10 additions & 11 deletions nhtsa-5-star-api-scrape.bash
    Original file line number Diff line number Diff line change
    @@ -7,25 +7,24 @@
    # I query for JSON for most of the loop, and in the end, I get the Vehicle data in CSV format

    BURL='http://www.nhtsa.gov/webapi/api/SafetyRatings'
    # get all the years first
    years=$(curl -s "$BURL?format=json" | jq -r '.Results[] .ModelYear')

    for year in $years; do
    # get all the years first
    curl -s "$BURL?format=json" | jq -r '.Results[] .ModelYear' | \
    while read year; do
    echo "$year"
    echo "######"
    curl -s "$BURL/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/' | while read -r carmake; do
    # Get the year and make
    echo " $carmake"
    echo " ======="

    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/' | while read -r model; do
    echo " $model"
    echo " -------"


    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/' | \
    while read -r model; do
    echo " $model"
    echo " -------"
    # Get the year, make, and model
    curl -s "$BURL/modelyear/$year/make/$carmake/model/$model?format=json" | jq -r '.Results[] .VehicleId' | while read -r id; do
    echo " $id: $year - $carmake - $model"
    curl -s "$BURL/modelyear/$year/make/$carmake/model/$model?format=json" | jq -r '.Results[] .VehicleId' | \
    while read -r id; do
    echo " $id: $year - $carmake - $model"
    curl -s "$BURL/VehicleId/$id?format=csv" -o "$id.csv"
    done
    done
  13. dannguyen revised this gist Feb 9, 2015. 1 changed file with 15 additions and 5 deletions.
    20 changes: 15 additions & 5 deletions nhtsa-5-star-api-scrape.bash
    Original file line number Diff line number Diff line change
    @@ -1,22 +1,32 @@
    years=$(curl -s 'http://www.nhtsa.gov/webapi/api/SafetyRatings?format=json' | jq -r '.Results[] .ModelYear')
    # jq JSON parser is awesome:
    # http://stedolan.github.io/jq/

    # The NHTSA API is pretty clunky, requiring you to get a list of all the years, then all the models in that year, then all the makes per model, and then
    # finally, you get the vehicle IDs needed to query the endpoint for one vehicle at a time.
    #
    # I query for JSON for most of the loop, and in the end, I get the Vehicle data in CSV format

    BURL='http://www.nhtsa.gov/webapi/api/SafetyRatings'
    # get all the years first
    years=$(curl -s "$BURL?format=json" | jq -r '.Results[] .ModelYear')

    for year in $years; do
    echo "$year"
    echo "######"
    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/' | while read -r carmake; do
    curl -s "$BURL/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/' | while read -r carmake; do
    # Get the year and make
    echo " $carmake"
    echo " ======="

    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/' | while read -r model; do
    curl -s "$BURL/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/' | while read -r model; do
    echo " $model"
    echo " -------"


    # Get the year, make, and model
    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/modelyear/$year/make/$carmake/model/$model?format=json" | jq -r '.Results[] .VehicleId' | while read -r id; do
    curl -s "$BURL/modelyear/$year/make/$carmake/model/$model?format=json" | jq -r '.Results[] .VehicleId' | while read -r id; do
    echo " $id: $year - $carmake - $model"
    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/VehicleId/$id?format=csv" -o "$id.csv"
    curl -s "$BURL/VehicleId/$id?format=csv" -o "$id.csv"
    done
    done
    done
  14. dannguyen created this gist Feb 9, 2015.
    23 changes: 23 additions & 0 deletions nhtsa-5-star-api-scrape.bash
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,23 @@
    years=$(curl -s 'http://www.nhtsa.gov/webapi/api/SafetyRatings?format=json' | jq -r '.Results[] .ModelYear')

    for year in $years; do
    echo "$year"
    echo "######"
    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/modelyear/$year?format=json" | jq -r '.Results[] .Make' | sed 's/ /%20/' | while read -r carmake; do
    # Get the year and make
    echo " $carmake"
    echo " ======="

    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/modelyear/$year/make/$carmake?format=json" | jq -r '.Results[] .Model' | sed 's/ /%20/' | while read -r model; do
    echo " $model"
    echo " -------"


    # Get the year, make, and model
    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/modelyear/$year/make/$carmake/model/$model?format=json" | jq -r '.Results[] .VehicleId' | while read -r id; do
    echo " $id: $year - $carmake - $model"
    curl -s "http://www.nhtsa.gov/webapi/api/SafetyRatings/VehicleId/$id?format=csv" -o "$id.csv"
    done
    done
    done
    done