# URL for the Washington Wizards Basketball Reference page wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html') # The requests library can send a GET request to the wiz_url wiz_res = requests.get(wiz_url) # BeautifulSoup library parses the content of an HTML document, in this case wiz_res wiz_soup = BeautifulSoup(wiz_res.content, 'lxml') # BeautifulSoup's .find() method searches for a tag and specified attributes, # returning the first match wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'}) # Making a list of dictionaries to then convert into a pd.DataFrame wiz_info = [] for row in wiz_per_game.find_all('tr')[1:]: # Excluding the first 'tr', since that's the table's title head player = {} player['Name'] = row.find('a').text.strip() player['Age'] = row.find('td', {'data-stat' : 'age'}).text player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href']) player_rest = requests.get(player_url) player_soup = BeautifulSoup(player_rest.content, 'lxml') player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'}) player_links= [] for link in player_info.find_all('a'): player_links.append(link.get('href')) if 'twitter' in player_links[1]: player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '') else: player['Twitter Handle'] = 'Not Listed' s = str(player_info.find_all('p')) weight = re.search('\"weight\">(.*)lb', s) position = re.search('Position:\n \n (.*)\n\n', s) height = re.search('\"height\">(.*),\xa0