Skip to content

Instantly share code, notes, and snippets.

@powerdude
Forked from dimi-tree/01-02.py
Created September 29, 2016 19:46
Show Gist options
  • Select an option

  • Save powerdude/8a9bfbfcd5da30a70ee8a399f8b95bad to your computer and use it in GitHub Desktop.

Select an option

Save powerdude/8a9bfbfcd5da30a70ee8a399f8b95bad to your computer and use it in GitHub Desktop.
Udacity: Machine Learning for Trading
# Working with multiple stocks
"""
SPY is used for reference - it's the market
Normalize by the first day's price to plot on "equal footing"
"""
import os
import pandas as pd
import matplotlib.pyplot as plt
def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
if 'SPY' not in symbols: # add SPY for reference, if absent
symbols.insert(0, 'SPY')
for symbol in symbols:
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
df_temp.rename(columns={'Adj Close': symbol}, inplace=True)
df = df.join(df_temp)
if symbol == 'SPY': # drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
return df
def normalize_data(df):
"""Normalize stock prices using the first row of the dataframe."""
return df / df.ix[0, :]
def plot_data(df, title="Stock prices"):
"""Plot stock prices with a custom title and meaningful axis labels."""
ax = df.plot(title=title, fontsize=12)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
def plot_selected(df, columns, start_index, end_index):
"""Plot the desired columns over index values in the given range."""
df = normalize_data(df)
plot_data(df.ix[start_index:end_index, columns])
def test_run():
# Define a date range
dates = pd.date_range('2010-01-01', '2010-12-31')
# Choose stock symbols to read
symbols = ['GOOG', 'IBM', 'GLD'] # SPY will be added in get_data()
# Get stock data
df = get_data(symbols, dates)
# Slice and plot
plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01')
if __name__ == "__main__":
test_run()
# Timing Python operations
import time
t1 = time.time()
print 'Execute your function'
t2 = time.time()
print 'The time taken by print statement is {} seconds'.format(t2-t1)
"""Bollinger Bands."""
import os
import pandas as pd
import matplotlib.pyplot as plt
def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
if 'SPY' not in symbols: # add SPY for reference, if absent
symbols.insert(0, 'SPY')
for symbol in symbols:
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
df_temp = df_temp.rename(columns={'Adj Close': symbol})
df = df.join(df_temp)
if symbol == 'SPY': # drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
return df
def plot_data(df, title="Stock prices"):
"""Plot stock prices with a custom title and meaningful axis labels."""
ax = df.plot(title=title, fontsize=12)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
def get_rolling_mean(values, window):
"""Return rolling mean of given values, using specified window size."""
return pd.rolling_mean(values, window=window)
def get_rolling_std(values, window):
"""Return rolling standard deviation of given values, using specified window size."""
return pd.rolling_std(values, window=window)
def get_bollinger_bands(rm, rstd):
"""Return upper and lower Bollinger Bands."""
upper_band = rm + 2*rstd
lower_band = rm - 2*rstd
return upper_band, lower_band
def test_run():
# Read data
dates = pd.date_range('2012-01-01', '2012-12-31')
symbols = ['SPY']
df = get_data(symbols, dates)
# Compute Bollinger Bands
# 1. Compute rolling mean
rm_SPY = get_rolling_mean(df['SPY'], window=20)
# 2. Compute rolling standard deviation
rstd_SPY = get_rolling_std(df['SPY'], window=20)
# 3. Compute upper and lower bands
upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
# Plot raw SPY values, rolling mean and Bollinger Bands
ax = df['SPY'].plot(title="Bollinger Bands", label='SPY')
rm_SPY.plot(label='Rolling mean', ax=ax)
upper_band.plot(label='upper band', ax=ax)
lower_band.plot(label='lower band', ax=ax)
# Add axis labels and legend
ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.legend(loc='upper left')
plt.show()
if __name__ == "__main__":
test_run()
"""
Daily returns
daily_ret[t] = (price[t]/price[t-1]) - 1
Cumulative returns
cumret[t] = (price[t]/price[0]) - 1
"""
import os
import pandas as pd
import matplotlib.pyplot as plt
def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df = pd.DataFrame(index=dates)
if 'SPY' not in symbols: # add SPY for reference, if absent
symbols.insert(0, 'SPY')
for symbol in symbols:
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
df_temp = df_temp.rename(columns={'Adj Close': symbol})
df = df.join(df_temp)
if symbol == 'SPY': # drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
return df
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
"""Plot stock prices with a custom title and meaningful axis labels."""
ax = df.plot(title=title, fontsize=12)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
plt.show()
def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.pct_change()
# Daily return values for the first date cannot be calculated. Set these to zero.
daily_returns.ix[0, :] = 0
# Alternative method
# daily_returns = (df / df.shift(1)) - 1
# daily_returns.ix[0, :] = 0
return daily_returns
def test_run():
# Read data
dates = pd.date_range('2012-07-01', '2012-07-31') # one month only
symbols = ['SPY','XOM']
df = get_data(symbols, dates)
plot_data(df)
# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")
if __name__ == "__main__":
test_run()
"""
Dealing with missing data:
1. Fill forward (to avoid peeking into the future)
2. Fill backward
"""
def fill_missing_values(df_data):
"""Fill missing values in data frame, in place."""
df_data.fillna(method='ffill', inplace=True)
df_data.fillna(method='bfill', inplace=True)
return df_data
"""
kurtosis (quantifies whether the shape of the data distribution matches the Gaussian distribution)
+ fat tails
- skinny tails
Scatterplots
slope (Beta): how reactive a stock is to the market - higher Beta means
the stock is more reactive to the market
NOTE: slope != correlation
correlation is a measure of how tightly do the individual points fit the line
intercept (alpha): +ve --> the stock on avg is performing a little bit better
than the market
In many cases in financial research we assume the daily returns are normally distributed,
but this can be dangerous because it ignores kurtosis or the probability in the
tails.
"""
# Compute daily returns
daily_returns = compute_daily_returns(df)
# Plot a histogram
daily_returns.hist(bins=20)
# Get mean as standard deviation
mean = daily_returns['SPY'].mean()
std = daily_returns['SPY'].std()
plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
plt.show()
# Compute kurtosis
daily_returns.kurtosis()
# Compute and plot two histograms on the same chart
daily_returns['SPY'].hist(bins=20, label='SPY')
daily_returns['XOM'].hist(bins=20, label='XOM')
plt.legend(loc='upper right')
plt.show()
# Scatterplots
daily_returns.plot(kind='scatter', x='SPY', y='XOM') # SPY vs XOM
beta_XOM, alpha_XOM = np.polyfit(daily_returns['SPY'], daily_returns['XOM'], 1) # fit poly degree 1
plt.plot(daily_returns['SPY'], beta_XOM*daily_returns['SPY'] + alpha_XOM, '-', color='r')
daily_returns.plot(kind='scatter', x='SPY', y='GLD') # SPY vs GLD
beta_GLD, alpha_GLD = np.polyfit(daily_returns['SPY'], daily_returns['GLD'], 1) # fit poly degree 1
plt.plot(daily_returns['SPY'], beta_GLD*daily_returns['SPY'] + alpha_GLD, '-', color='r')
# Calculate correlation coefficient
daily_returns.corr(method='pearson')
"""
Market capitalization for a stock: # shares outstanding * price
ETFs have 4 or 3 letters
Mutual Funds usually have 5 letters
Hedge Funds don't have abbreviations
AUM - Assets Under Management - is the total amount of money being managed by the fund.
How fund managers are rewarded:
Expense ratio
is typically a percentage of AUM, therefore higher the AUM value, greater the incentive.
Two & Twenty
This structure actually motivates both AUM accumulation ("Two") as well as
Profits ("Twenty"). Here "Risk taking" is synonymous with aiming for greater
profits, which is motivated by the Two & Twenty model.
"""
Interview with Tammer Kamel
Build strategy that is:
1) Theoretically sound
2) Empirically testable
3) Simple
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment