AlexanderMike · February 17, 2016 21:29
diff --git a/lxml_test.py b/lxml_test.py
 '''Notes from an experiment in XML using Windows 7 and Python 2.7 trying to
 follow example from:
 http://docs.python-guide.org/en/latest/scenarios/scrape/#.
 1) to get lxml on Windows machine had to install wheel file for libxml2.dll as
 pip install failed repeatedly (also Visual C++ 9.0)
 2) The tried to parse MLB 2015 stats from this page:
 http://espn.go.com/mlb/standings/_/group/overall
 where xpath was //*[@id="main-container"]/div/section/div[2]/div/div[2]/table/tbody/tr[1]/td[2]
 however, this failed because browser xpath is not raw, and browser inserted a tbody where none existed in 
 source code. Therefore, my queries failed until I went element by element!'''
 import requests
 from lxml import html

 page = requests.get('http://espn.go.com/mlb/standings/_/group/overall')
 tree = html.fromstring(page.content)
 standings = tree.xpath('//*[@id="main-container"]/div/section/div[2]/div/div[2]/table/tr[1]/td[1]/a/span/abbr/text()')
 #['STL']
	'''Notes from an experiment in XML using Windows 7 and Python 2.7 trying to
	follow example from:
	http://docs.python-guide.org/en/latest/scenarios/scrape/#.
	1) to get lxml on Windows machine had to install wheel file for libxml2.dll as
	pip install failed repeatedly (also Visual C++ 9.0)
	2) The tried to parse MLB 2015 stats from this page:
	http://espn.go.com/mlb/standings/_/group/overall
	where xpath was //*[@id="main-container"]/div/section/div[2]/div/div[2]/table/tbody/tr[1]/td[2]
	however, this failed because browser xpath is not raw, and browser inserted a tbody where none existed in
	source code. Therefore, my queries failed until I went element by element!'''
	import requests
	from lxml import html

	page = requests.get('http://espn.go.com/mlb/standings/_/group/overall')
	tree = html.fromstring(page.content)
	standings = tree.xpath('//*[@id="main-container"]/div/section/div[2]/div/div[2]/table/tr[1]/td[1]/a/span/abbr/text()')
	#['STL']
No results found