Last active
September 29, 2019 03:49
-
-
Save danielsobrado/b0866f6bba655845192919e5cd131431 to your computer and use it in GitHub Desktop.
Download all the components from an stock market index from Yahoo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import yfinance as yf | |
| class MarketIndex: | |
| def __init__(self, url = 0, pos = 0, postfix="", column="Ticker"): | |
| self.url = url | |
| self.pos = pos | |
| self.postfix = postfix | |
| self.column = column | |
| def get_data(start='2008-01-01', end=None, index='syp'): | |
| # Define available Indexes to be downloaded | |
| indexes = {} | |
| indexes['ibex35'] = MarketIndex(r'https://es.wikipedia.org/wiki/IBEX_35', 2, '.MC') | |
| indexes['syp'] = MarketIndex(r'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies', 0, '') | |
| indexes['nasdaq100'] = MarketIndex(r'https://en.wikipedia.org/wiki/NASDAQ-100', 2, '') | |
| indexes['ftse250'] = MarketIndex(r'https://en.wikipedia.org/wiki/FTSE_250_Index', 1, '.L', 'Ticker[4]') | |
| indexes['ftsemib'] = MarketIndex(r'https://en.wikipedia.org/wiki/FTSE_MIB', 0, '.MI') | |
| indexes['mdax'] = MarketIndex(r'https://en.wikipedia.org/wiki/MDAX', 1, '.DE') | |
| indexes['dax'] = MarketIndex(r'https://en.wikipedia.org/wiki/DAX', 2, '.DE', 'Ticker symbol') | |
| indexes['eurostoxx50'] = MarketIndex(r'https://en.wikipedia.org/wiki/Euro_Stoxx_50', 2, '') | |
| indexes['aex'] = MarketIndex(r'https://en.wikipedia.org/wiki/AEX_index', 1, '.AS', 'Ticker symbol') | |
| indexes['amx'] = MarketIndex(r'https://en.wikipedia.org/wiki/AMX_index', 0, '.AS', 'Ticker symbol') | |
| indexes['cac40'] = MarketIndex(r'https://en.wikipedia.org/wiki/CAC_40', 2, '.PA') | |
| indexes['bel20'] = MarketIndex(r'https://en.wikipedia.org/wiki/BEL_20', 1, '.BR', 'Ticker symbol') | |
| if index in indexes: | |
| url = indexes[index].url | |
| pos = indexes[index].pos | |
| postfix = indexes[index].postfix | |
| column = indexes[index].column | |
| else: | |
| print('Index '+index+' not configured/available!') | |
| # Get the current components tickers list | |
| try: | |
| index_assets = pd.read_html(url, header=0)[pos] | |
| assets = ((index_assets[column])+postfix).tolist() | |
| except KeyError: | |
| print('Failed to find column: '+column+', try again.') | |
| print('Available columns: '+str(list(index_assets.columns))) | |
| print('Verify the url to see if there are changes: '+url) | |
| data = None | |
| return data | |
| # Dowload historical data to a multi-index dataframe | |
| try: | |
| data = yf.download(assets, start=start, as_panel=False) | |
| filename = '.\\datasets\markets\\'+index+'_components_data.pkl' | |
| data.to_pickle(filename) | |
| print('Data saved at {}'.format(filename)) | |
| except ValueError: | |
| print('Failed download, try again.') | |
| data = None | |
| except OSError: | |
| print('Failed saving file: '+filename+', try again.') | |
| return data | |
| return data | |
| nasdaq100_data = get_data(index="nasdaq100") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment