Skip to content

Instantly share code, notes, and snippets.

@alan-ho
Last active January 4, 2018 01:27
Show Gist options
  • Select an option

  • Save alan-ho/f7a183c5cf4efbbe4ae80cce6105d525 to your computer and use it in GitHub Desktop.

Select an option

Save alan-ho/f7a183c5cf4efbbe4ae80cce6105d525 to your computer and use it in GitHub Desktop.
Creating Pandas DataFrames
#Create DataFrame from lists
import pandas as pd
import numpy as np
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
df = pd.DataFrame({'country_name' : countries, 'gold': gold,'silver': silver, 'bronze': bronze})
medal_counts = df[['gold', 'silver', 'bronze']]
points = np.dot(medal_counts, [4,2,1])
olympic_points = {'country_name': countries, 'points':points}
olympic_points_df = pd.DataFrame(olympic_points)
#Create DataFrame from Dictionary
import pandas as pd
data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012], 'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],'wins': [11, 8, 10, 15, 11, 6, 10, 4], 'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
football = pd.DataFrame(data)
print (football.dtypes)
print (football.describe())
print (football.iloc[[0]])
print (football.loc[[0]])
print (football[3:5])
print (football[football.wins > 10])
print (football[(football.wins > 10) & (football.team == "Packers")])
#Create DataFrame from lists
import pandas as pd
import numpy as np
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
olympic_medal_counts_df = pd.DataFrame({'country_name' : countries, 'gold': gold,'silver': silver, 'bronze': bronze})
#Creating a filtered DataFrame of bronze with at least 1 gold followed by averaging the series
bronze_at_least_one_gold = olympic_medal_counts_df['bronze'][olympic_medal_counts_df['gold']>=1]
avg_bronze_at_least_one_gold = np.mean(bronze_at_least_one_gold)
#Creating DataFrame series of average gold, bronze and silver
avg_medal_count = df[['gold','silver','bronze']].apply(np.mean)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment