Skip to content

Instantly share code, notes, and snippets.

@ivanku
Created August 5, 2016 06:05
Show Gist options
  • Select an option

  • Save ivanku/00d2520ba6d92daf97e50d9ebc6eb4cd to your computer and use it in GitHub Desktop.

Select an option

Save ivanku/00d2520ba6d92daf97e50d9ebc6eb4cd to your computer and use it in GitHub Desktop.

Revisions

  1. ivanku created this gist Aug 5, 2016.
    104 changes: 104 additions & 0 deletions life_expectancy.jl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,104 @@
    using Requests
    using LibExpat
    using DataFrames
    using Plotly
    using Colors

    res = get("https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)");
    et = xp_parse(bytestring(res.data));
    rows = LibExpat.find(et, "//table[1]//tr");

    countries = UTF8String[];
    regions = UTF8String[];
    population = UTF8String[];

    for iter in eachindex(rows)
    cells = LibExpat.find(rows[iter], "td");
    if length(cells) > 1 && length(cells[2].elements) > 1
    push!(countries, strip(cells[2].elements[2].elements[1]));
    push!(regions, strip(cells[3].elements[1].elements[1]));
    push!(population, strip(cells[6].elements[1]));
    end
    end

    population = DataFrame(country=countries, region=regions, population=population);
    population[population[:country] .== "United States of America", :country] = "United States";
    population[population[:country] .== "Federated States of Micronesia", :country] = "Micronesia";
    population[population[:country] .== "Republic of Macedonia", :country] = "Macedonia";

    res = get("https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy");
    et = xp_parse(bytestring(res.data));
    rows = LibExpat.find(et, "//table[1]//tr");

    countries_expectancy = UTF8String[];
    female_life_expectancy = UTF8String[];
    male_life_expectancy = UTF8String[];

    for iter in eachindex(rows)
    cells = LibExpat.find(rows[iter], "td");
    if length(cells) > 1 && length(cells[1].elements) > 1
    push!(countries_expectancy, strip(cells[1].elements[2].elements[1]));
    push!(female_life_expectancy, strip(cells[5].elements[1]));
    push!(male_life_expectancy, strip(cells[7].elements[1]));
    end
    end

    expectancy = DataFrame(country=countries_expectancy, female_life_expectancy=female_life_expectancy, male_life_expectancy=male_life_expectancy);

    expectancy[expectancy[:country] .== "Democratic People's Republic of Korea", :country] = "North Korea";
    expectancy[expectancy[:country] .== "Republic of Korea", :country] = "South Korea";
    expectancy[expectancy[:country] .== "Brunei Darussalam", :country] = "Brunei";
    expectancy[expectancy[:country] .== "Lao People's Democratic Republic", :country] = "Laos";
    expectancy[expectancy[:country] .== "Republic of Moldova", :country] = "Moldova";
    expectancy[expectancy[:country] .== "Russian Federation", :country] = "Russia";
    expectancy[expectancy[:country] .== "United Republic of Tanzania", :country] = "Tanzania";
    expectancy[expectancy[:country] .== "Viet Nam", :country] = "Vietnam";

    expectancy = join(population, expectancy, on = :country);
    expectancy[:female_life_expectancy] = map(x -> parse(Float32, x), expectancy[:female_life_expectancy]);
    expectancy[:male_life_expectancy] = map(x -> parse(Float32, x), expectancy[:male_life_expectancy]);
    expectancy[:population] = map(x -> parse(Int64, replace(x, ",", "", 0)), expectancy[:population]);

    layout = Layout(showlegend = true,
    hovermode = "closest",
    xaxis = Dict(:title => "Male Life Expectancy [years]"),
    yaxis = Dict(:title => "Female Life Expectancy [years]"));
    expectancy[:sizes] = map(x -> 10000 * x / maximum(expectancy[:population]), expectancy[:population]);

    data = [
    scatter(
    x = [45, 90],
    y = [45, 90],
    mode = "lines",
    showlegend = false,
    line = Dict(:color => "rgba(60, 60, 60, 0.5)", :dash => "dashed", :width => 0.5))
    ];

    region_colors = Dict(zip(
    unique(expectancy[:region]),
    diverging_palette(100, 200, length(unique(expectancy[:region])), c = 0.4)
    ));

    for region in unique(expectancy[:region])
    regional_expectancy = expectancy[expectancy[:region] .== region, :]
    push!(data,
    scatter(
    x = regional_expectancy[:male_life_expectancy],
    y = regional_expectancy[:female_life_expectancy],
    mode = "markers",
    name = region,
    marker = Dict(:color => region_colors[region], :size => regional_expectancy[:sizes], :sizemode => "area"),
    text = map(
    (country, population) ->
    string(
    country,
    "<br>population (size): ",
    population),
    regional_expectancy[:country],
    regional_expectancy[:population]),
    hoverinfo = "text"
    )
    );
    end

    my_plot = plot(data, layout)