Skip to content

Instantly share code, notes, and snippets.

@kboghe
kboghe / aggregating_googleplace_cat.csv
Created September 28, 2020 10:01
aggregating_googleplace_cat
index category cat
Supermarkt 4087 supermarket
Restaurant 2838 restaurant
Kruidenier 595 supermarket
Italiaans restaurant 385 restaurant
Snackbar 233 restaurant
no category available 227 no category
Frans restaurant 218 restaurant
Pizzeria 215 restaurant
Discountsupermarkt 191 supermarket
@kboghe
kboghe / peakpopularityhour.py
Created September 28, 2020 08:33
peakpopularityhour
##########################
#check popularity by hour#
##########################
byhour_pop = geos_locinfo_poptimes[(geos_locinfo_poptimes['category_aggregated'] == 'restaurant')].groupby(['country', 'day list','hour list'])['percentage busy'].agg(['mean']).reset_index()
byhour_pop = byhour_pop[~byhour_pop['hour list'].isin(['day marked as closed','not enough location data available for this day'])]
byhour_pop = byhour_pop.pivot(index=['country','day list'], columns='hour list', values='mean')
hours_range = [str(x) for x in range(24)]
hours_range_night,hours_range_evening = hours_range[0:2],hours_range[17:24]
hours_evening_night = hours_range_evening + hours_range_night
@kboghe
kboghe / peakpopularityplot.py
Created September 28, 2020 08:11
peakpopularityplot
sns.set_theme(style="darkgrid")
df_res_ov,df_res_peak = dict_pop['country_restaurant_overall'], dict_pop['country_restaurant_peak']
df_res_ov['type'],df_res_peak['type'] = ['restaurant'] * len(df_res_ov), ['restaurant'] * len(df_res_ov)
df_res_ov['method'],df_res_peak['method'] = ['by overall day popularity'] * len(df_res_ov),['by peak hour'] * len(df_res_peak)
df = pd.concat([df_res_ov,df_res_peak])
df["day list"].replace({"monday": "0", "tuesday": "1","wednesday":"2","thursday":"3","friday":"4","saturday":"5","sunday":"6"}, inplace=True)
df['country_order'] = df["country"].replace({"NL": "0", "BE": "1","DE":"2","FR":"3","ES":"4","IT":"5"})
@kboghe
kboghe / summarizedata.py
Last active September 28, 2020 07:54
Summarize data
###################################################
#### 1. create two functions to summarize data ####
###################################################
## 1.1 most popular day by hour and overall popularity + number of locations per country + number of open places per country ##
###############################################################################################################################
def places_available(area=None,placetype=None,method=None):
if any(parameter is None for parameter in [method,area,placetype]) :
Exception("Please provide a value for the area, input and placetype parameters")
if method == 'peak':
@kboghe
kboghe / hierarchicalclus.py
Last active September 24, 2020 14:01
hierarchicalclus
from sklearn.cluster import AgglomerativeClustering
from tslearn.clustering import TimeSeriesKMeans
import pandas as pd
#writing function to summarize cluster membership across regions
def cluster_membership(cases=None,cluster=None):
url_clus = pd.DataFrame({'url':cases,'cluster':cluster})
url_clus = pd.merge(url_clus,geos_locinfo_poptimes[['url','country']].drop_duplicates(),how='left')
url_clus['index'] = 1
url_clus = url_clus.pivot(index=['url','cluster'], columns='country', values='index')
@kboghe
kboghe / plottingparis.py
Last active September 24, 2020 12:08
plotting paris
import pandas as pd
import geopandas
f, axs = plt.subplots(1,1, figsize=(90, 135),sharex=False,sharey=False)
loc = ['Paris_map.geojson'] #area to plot
fileloc = "maps/"+ loc[0] #folder containing all maps
map = geopandas.read_file(fileloc) #read the map
map.plot(ax=axs,linewidth=0.1)
axs.axis('off')
@kboghe
kboghe / osmcitymaps.py
Last active September 24, 2020 10:59
Retrieving city maps osm
import json
import numpy as np
import geopandas
import osmnx
#retrieve city shapemaps and write to hard drive#
locations = geos['search input'].unique() #creating a list of cities (e.g. "Paris,Nice,...")
locations = ['Hamburg,Hamburg,Germany' if loc == 'Hamburg' else loc for loc in locations]
locations = ['Saragossa' if loc == 'Zaragozza' else loc for loc in locations]
@kboghe
kboghe / positionstrack.py
Last active September 24, 2020 08:53
Performing Position stack requests in Python
import re
import pandas as pd
import http.client, urllib.parse
import random
import time
import json
import numpy as np
import math
addr_city = locinfo[['id','address','country']].drop_duplicates().reset_index() #A pandas dataframe containing the id, address and country info (one row = one location)
@kboghe
kboghe / forcatchvpnloop.py
Last active August 11, 2020 15:48
for-catch-vpn-loop
for i in range(2):
try:
current_ip = new_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')
except urllib.error.URLError:
print("Can't fetch current ip. Retrying...")
time.sleep(10)
continue
else:
print("\nYour current ip-address is:", current_ip)
break
@kboghe
kboghe / forcatchloop.py
Last active October 6, 2022 17:33
for-catch loop
import requests
from bs4 import BeautifulSoup
import random
headers_browser = {'Connection': 'close',"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip","Accept-Language": "en-US,en;q=0.9,es;q=0.8", "Upgrade-Insecure-Requests": "1","User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"}
for i in range(3): # loop the try-part (i.e. opening the link) until it works, but only try it 4 times at most#
try: #try the following:#
random_sleep_link = random.uniform(10, 15) #sleep for a random chosen amount of seconds between 10 and 15 seconds#