-
-
Save powerdude/8a9bfbfcd5da30a70ee8a399f8b95bad to your computer and use it in GitHub Desktop.
Udacity: Machine Learning for Trading
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Working with multiple stocks | |
| """ | |
| SPY is used for reference - it's the market | |
| Normalize by the first day's price to plot on "equal footing" | |
| """ | |
| import os | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| def symbol_to_path(symbol, base_dir="data"): | |
| """Return CSV file path given ticker symbol.""" | |
| return os.path.join(base_dir, "{}.csv".format(str(symbol))) | |
| def get_data(symbols, dates): | |
| """Read stock data (adjusted close) for given symbols from CSV files.""" | |
| df = pd.DataFrame(index=dates) | |
| if 'SPY' not in symbols: # add SPY for reference, if absent | |
| symbols.insert(0, 'SPY') | |
| for symbol in symbols: | |
| df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', | |
| parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) | |
| df_temp.rename(columns={'Adj Close': symbol}, inplace=True) | |
| df = df.join(df_temp) | |
| if symbol == 'SPY': # drop dates SPY did not trade | |
| df = df.dropna(subset=["SPY"]) | |
| return df | |
| def normalize_data(df): | |
| """Normalize stock prices using the first row of the dataframe.""" | |
| return df / df.ix[0, :] | |
| def plot_data(df, title="Stock prices"): | |
| """Plot stock prices with a custom title and meaningful axis labels.""" | |
| ax = df.plot(title=title, fontsize=12) | |
| ax.set_xlabel("Date") | |
| ax.set_ylabel("Price") | |
| plt.show() | |
| def plot_selected(df, columns, start_index, end_index): | |
| """Plot the desired columns over index values in the given range.""" | |
| df = normalize_data(df) | |
| plot_data(df.ix[start_index:end_index, columns]) | |
| def test_run(): | |
| # Define a date range | |
| dates = pd.date_range('2010-01-01', '2010-12-31') | |
| # Choose stock symbols to read | |
| symbols = ['GOOG', 'IBM', 'GLD'] # SPY will be added in get_data() | |
| # Get stock data | |
| df = get_data(symbols, dates) | |
| # Slice and plot | |
| plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01') | |
| if __name__ == "__main__": | |
| test_run() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Timing Python operations | |
| import time | |
| t1 = time.time() | |
| print 'Execute your function' | |
| t2 = time.time() | |
| print 'The time taken by print statement is {} seconds'.format(t2-t1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Bollinger Bands.""" | |
| import os | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| def symbol_to_path(symbol, base_dir="data"): | |
| """Return CSV file path given ticker symbol.""" | |
| return os.path.join(base_dir, "{}.csv".format(str(symbol))) | |
| def get_data(symbols, dates): | |
| """Read stock data (adjusted close) for given symbols from CSV files.""" | |
| df = pd.DataFrame(index=dates) | |
| if 'SPY' not in symbols: # add SPY for reference, if absent | |
| symbols.insert(0, 'SPY') | |
| for symbol in symbols: | |
| df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', | |
| parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) | |
| df_temp = df_temp.rename(columns={'Adj Close': symbol}) | |
| df = df.join(df_temp) | |
| if symbol == 'SPY': # drop dates SPY did not trade | |
| df = df.dropna(subset=["SPY"]) | |
| return df | |
| def plot_data(df, title="Stock prices"): | |
| """Plot stock prices with a custom title and meaningful axis labels.""" | |
| ax = df.plot(title=title, fontsize=12) | |
| ax.set_xlabel("Date") | |
| ax.set_ylabel("Price") | |
| plt.show() | |
| def get_rolling_mean(values, window): | |
| """Return rolling mean of given values, using specified window size.""" | |
| return pd.rolling_mean(values, window=window) | |
| def get_rolling_std(values, window): | |
| """Return rolling standard deviation of given values, using specified window size.""" | |
| return pd.rolling_std(values, window=window) | |
| def get_bollinger_bands(rm, rstd): | |
| """Return upper and lower Bollinger Bands.""" | |
| upper_band = rm + 2*rstd | |
| lower_band = rm - 2*rstd | |
| return upper_band, lower_band | |
| def test_run(): | |
| # Read data | |
| dates = pd.date_range('2012-01-01', '2012-12-31') | |
| symbols = ['SPY'] | |
| df = get_data(symbols, dates) | |
| # Compute Bollinger Bands | |
| # 1. Compute rolling mean | |
| rm_SPY = get_rolling_mean(df['SPY'], window=20) | |
| # 2. Compute rolling standard deviation | |
| rstd_SPY = get_rolling_std(df['SPY'], window=20) | |
| # 3. Compute upper and lower bands | |
| upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY) | |
| # Plot raw SPY values, rolling mean and Bollinger Bands | |
| ax = df['SPY'].plot(title="Bollinger Bands", label='SPY') | |
| rm_SPY.plot(label='Rolling mean', ax=ax) | |
| upper_band.plot(label='upper band', ax=ax) | |
| lower_band.plot(label='lower band', ax=ax) | |
| # Add axis labels and legend | |
| ax.set_xlabel("Date") | |
| ax.set_ylabel("Price") | |
| ax.legend(loc='upper left') | |
| plt.show() | |
| if __name__ == "__main__": | |
| test_run() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Daily returns | |
| daily_ret[t] = (price[t]/price[t-1]) - 1 | |
| Cumulative returns | |
| cumret[t] = (price[t]/price[0]) - 1 | |
| """ | |
| import os | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| def symbol_to_path(symbol, base_dir="data"): | |
| """Return CSV file path given ticker symbol.""" | |
| return os.path.join(base_dir, "{}.csv".format(str(symbol))) | |
| def get_data(symbols, dates): | |
| """Read stock data (adjusted close) for given symbols from CSV files.""" | |
| df = pd.DataFrame(index=dates) | |
| if 'SPY' not in symbols: # add SPY for reference, if absent | |
| symbols.insert(0, 'SPY') | |
| for symbol in symbols: | |
| df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', | |
| parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) | |
| df_temp = df_temp.rename(columns={'Adj Close': symbol}) | |
| df = df.join(df_temp) | |
| if symbol == 'SPY': # drop dates SPY did not trade | |
| df = df.dropna(subset=["SPY"]) | |
| return df | |
| def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"): | |
| """Plot stock prices with a custom title and meaningful axis labels.""" | |
| ax = df.plot(title=title, fontsize=12) | |
| ax.set_xlabel(xlabel) | |
| ax.set_ylabel(ylabel) | |
| plt.show() | |
| def compute_daily_returns(df): | |
| """Compute and return the daily return values.""" | |
| daily_returns = df.pct_change() | |
| # Daily return values for the first date cannot be calculated. Set these to zero. | |
| daily_returns.ix[0, :] = 0 | |
| # Alternative method | |
| # daily_returns = (df / df.shift(1)) - 1 | |
| # daily_returns.ix[0, :] = 0 | |
| return daily_returns | |
| def test_run(): | |
| # Read data | |
| dates = pd.date_range('2012-07-01', '2012-07-31') # one month only | |
| symbols = ['SPY','XOM'] | |
| df = get_data(symbols, dates) | |
| plot_data(df) | |
| # Compute daily returns | |
| daily_returns = compute_daily_returns(df) | |
| plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") | |
| if __name__ == "__main__": | |
| test_run() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Dealing with missing data: | |
| 1. Fill forward (to avoid peeking into the future) | |
| 2. Fill backward | |
| """ | |
| def fill_missing_values(df_data): | |
| """Fill missing values in data frame, in place.""" | |
| df_data.fillna(method='ffill', inplace=True) | |
| df_data.fillna(method='bfill', inplace=True) | |
| return df_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment