dgaitsgo · August 6, 2019 10:19
diff --git a/get_cdc_measles_data.py b/get_cdc_measles_data.py
 # coding: utf-8

 # In[30]:


 import os
 import json
 from contextlib import closing
 from bs4 import BeautifulSoup
 import csv


 # In[7]:


 from requests import get
 from requests.exceptions import RequestException
 from contextlib import closing

 def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


 def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)

 def log_error(e):
    print(e)


 # In[8]:


 #get latest data
 currWeek="30"
 currYear="2019"
 latestLink = f'https://wonder.cdc.gov/nndss/static/{currYear}/{currWeek}/{currYear}-{currWeek}-table1v.html'
 raw = simple_get(latestLink)


 # In[10]:


 #make a tree
 tree = BeautifulSoup(raw, 'html.parser')


 # In[52]:


 # parse and export
 csvHeader = ["reporting_area", "cum_ytd_indigenous", "cum_ytd_imported"]
 rows = tree.find_all('tr')

 with open('state-cum-measles.csv', 'w', newline='') as csvfile:
    data = csv.writer(csvfile)
    data.writerow(csvHeader)
    for i in range(3, len(rows)):
        curr_row = rows[i]
        reporting_area = curr_row.find('th').text
        cum_ytd_indigenous = curr_row.select('td[headers*=SH2-3]')[0].text
        cum_ytd_imported = curr_row.select('td[headers*=SH3-3]')[0].text 
        
        cum_ytd_indigenous = '0' if cum_ytd_indigenous == '-' else cum_ytd_indigenous
        cum_ytd_imported = '0' if cum_ytd_imported == '-' else cum_ytd_imported
        
        csvRow = [
            reporting_area,
            cum_ytd_indigenous,
            cum_ytd_imported
        ]
        data.writerow(csvRow)
	# coding: utf-8

	# In[30]:


	import os
	import json
	from contextlib import closing
	from bs4 import BeautifulSoup
	import csv


	# In[7]:


	from requests import get
	from requests.exceptions import RequestException
	from contextlib import closing

	def simple_get(url):
	"""
	Attempts to get the content at `url` by making an HTTP GET request.
	If the content-type of response is some kind of HTML/XML, return the
	text content, otherwise return None.
	"""
	try:
	with closing(get(url, stream=True)) as resp:
	if is_good_response(resp):
	return resp.content
	else:
	return None

	except RequestException as e:
	log_error('Error during requests to {0} : {1}'.format(url, str(e)))
	return None


	def is_good_response(resp):
	"""
	Returns True if the response seems to be HTML, False otherwise.
	"""
	content_type = resp.headers['Content-Type'].lower()
	return (resp.status_code == 200
	and content_type is not None
	and content_type.find('html') > -1)

	def log_error(e):
	print(e)


	# In[8]:


	#get latest data
	currWeek="30"
	currYear="2019"
	latestLink = f'https://wonder.cdc.gov/nndss/static/{currYear}/{currWeek}/{currYear}-{currWeek}-table1v.html'
	raw = simple_get(latestLink)


	# In[10]:


	#make a tree
	tree = BeautifulSoup(raw, 'html.parser')


	# In[52]:


	# parse and export
	csvHeader = ["reporting_area", "cum_ytd_indigenous", "cum_ytd_imported"]
	rows = tree.find_all('tr')

	with open('state-cum-measles.csv', 'w', newline='') as csvfile:
	data = csv.writer(csvfile)
	data.writerow(csvHeader)
	for i in range(3, len(rows)):
	curr_row = rows[i]
	reporting_area = curr_row.find('th').text
	cum_ytd_indigenous = curr_row.select('td[headers*=SH2-3]')[0].text
	cum_ytd_imported = curr_row.select('td[headers*=SH3-3]')[0].text

	cum_ytd_indigenous = '0' if cum_ytd_indigenous == '-' else cum_ytd_indigenous
	cum_ytd_imported = '0' if cum_ytd_imported == '-' else cum_ytd_imported

	csvRow = [
	reporting_area,
	cum_ytd_indigenous,
	cum_ytd_imported
	]
	data.writerow(csvRow)
No results found