Skip to content

Instantly share code, notes, and snippets.

@lionelyoung
Last active January 21, 2020 13:32
Show Gist options
  • Select an option

  • Save lionelyoung/041d8720e60ffe4df835c1858518dd78 to your computer and use it in GitHub Desktop.

Select an option

Save lionelyoung/041d8720e60ffe4df835c1858518dd78 to your computer and use it in GitHub Desktop.

Revisions

  1. lionelyoung revised this gist Jan 21, 2020. 1 changed file with 28 additions and 23 deletions.
    51 changes: 28 additions & 23 deletions labeling_plus.py
    Original file line number Diff line number Diff line change
    @@ -1,31 +1,37 @@
    def apply_pt_sl_on_t1_plus(close, events, molecule, pt_plus, sl_plus): # pragma: no cover
    def apply_pt_sl_on_t1(close, events, pt_sl, molecule): # pragma: no cover
    """
    Snippet 3.2, page 45, Triple Barrier Labeling Method
    This function applies the triple-barrier labeling method. It works on a set of
    datetime index values (molecule). This allows the program to parallelize the processing.
    Mainly it returns a DataFrame of timestamps regarding the time when the first barriers were reached.
    :param close: (series) close prices
    :param events: (series) of indices that signify "events" (see cusum_filter function
    for more details)
    :param pt_sl: (array) element 0, indicates the profit taking level; element 1 is stop loss level
    :param molecule: (an array) a set of datetime index values for processing
    :param pt_plus: (series) of indices that signify the profit target multiplier
    :param sl_plus: (series) of indices that signify the stop loss multiplier
    :return: DataFrame of timestamps of when first barrier was touched
    """
    # Apply stop loss/profit taking, if it takes place before t1 (end of event)
    events_ = events.loc[molecule]
    out = events_[['t1']].copy(deep=True)

    profit_taking_multiple = pt_plus.loc[molecule]
    stop_loss_multiple = sl_plus.loc[molecule]
    profit_taking_multiple = pt_sl[0]
    stop_loss_multiple = pt_sl[1]

    # Profit taking active
    profit_taking_multiple[profit_taking_multiple <= 0] = np.nan
    profit_taking = profit_taking_multiple * events_['trgt']

    if profit_taking_multiple > 0:
    profit_taking = profit_taking_multiple * events_['trgt']
    else:
    profit_taking = pd.Series(index=events.index) # NaNs

    # Stop loss active
    stop_loss_multiple[stop_loss_multiple < 0] = np.nan
    stop_loss = -stop_loss_multiple * events_['trgt']
    if stop_loss_multiple > 0:
    stop_loss = -stop_loss_multiple * events_['trgt']
    else:
    stop_loss = pd.Series(index=events.index) # NaNs

    # Get events
    for loc, vertical_barrier in events_['t1'].fillna(close.index[-1]).iteritems():
    @@ -37,20 +43,20 @@ def apply_pt_sl_on_t1_plus(close, events, molecule, pt_plus, sl_plus): # pragma
    return out


    def get_events_plus(close, t_events, pt_sl, target, min_ret, num_threads, vertical_barrier_times=False,
    pt_plus=None, sl_plus=None,
    def get_events(close, t_events, pt_sl, target, min_ret, num_threads, vertical_barrier_times=False,
    side_prediction=None):
    """
    Snippet 3.6 page 50, Getting the Time of the First Touch, with Meta Labels
    This function is orchestrator to meta-label the data, in conjunction with the Triple Barrier Method.
    :param close: (series) Close prices
    :param t_events: (series) of t_events. These are timestamps that will seed every triple barrier.
    These are the timestamps selected by the sampling procedures discussed in Chapter 2, Section 2.5.
    Eg: CUSUM Filter
    :param pt_sl: (2 element array) element 0, indicates the profit taking level; element 1 is stop loss level.
    :param pt_plus: (series) of indices that signify the profit target multiplier. A non-negative float. A 0 value means that the respective horizontal barrier will be disabled.
    :param sl_plus: (series) of indices that signify the stop loss multiplier. A non-negative float. A 0 value means that the respective horizontal barrier will be disabled.
    A non-negative float that sets the width of the two barriers. A 0 value means that the respective
    horizontal barrier (profit taking and/or stop loss) will be disabled.
    :param target: (series) of values that are used (in conjunction with pt_sl) to determine the width
    of the barrier. In this program this is daily volatility series.
    :param min_ret: (float) The minimum target return required for running a triple barrier search.
    @@ -70,8 +76,6 @@ def get_events_plus(close, t_events, pt_sl, target, min_ret, num_threads, vertic
    # 1) Get target
    target = target.loc[t_events]
    target = target[target > min_ret] # min_ret
    pt_plus_ = pt_plus.loc[t_events]
    sl_plus_ = sl_plus.loc[t_events]

    # 2) Get vertical barrier (max holding period)
    if vertical_barrier_times is False:
    @@ -80,22 +84,22 @@ def get_events_plus(close, t_events, pt_sl, target, min_ret, num_threads, vertic
    # 3) Form events object, apply stop loss on vertical barrier
    if side_prediction is None:
    side_ = pd.Series(1.0, index=target.index)
    pt_sl_ = [pt_sl[0], pt_sl[0]]
    else:
    side_ = side_prediction.loc[target.index] # Subset side_prediction on target index.

    pt_sl_ = pt_sl[:2]

    # Create a new df with [v_barrier, target, side] and drop rows that are NA in target
    events = pd.concat({'t1': vertical_barrier_times, 'trgt': target, 'side': side_}, axis=1)
    events = events.dropna(subset=['trgt'])

    # Apply Triple Barrier
    first_touch_dates = ml.labeling.mp_pandas_obj(func=apply_pt_sl_on_t1_plus,
    first_touch_dates = mp_pandas_obj(func=apply_pt_sl_on_t1,
    pd_obj=('molecule', events.index),
    num_threads=num_threads,
    close=close,
    events=events,
    pt_plus=pt_plus_,
    sl_plus=sl_plus_)
    pt_sl=pt_sl_)

    for ind in events.index:
    events.loc[ind, 't1'] = first_touch_dates.loc[ind, :].dropna().min()
    @@ -104,6 +108,7 @@ def get_events_plus(close, t_events, pt_sl, target, min_ret, num_threads, vertic
    events = events.drop('side', axis=1)

    # Add profit taking and stop loss multiples for vertical barrier calculations
    events['pt'] = pt_plus_
    events['sl'] = sl_plus_
    events['pt'] = pt_sl[0]
    events['sl'] = pt_sl[1]

    return events
  2. lionelyoung created this gist Jan 14, 2020.
    109 changes: 109 additions & 0 deletions labeling_plus.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,109 @@
    def apply_pt_sl_on_t1_plus(close, events, molecule, pt_plus, sl_plus): # pragma: no cover
    """
    Snippet 3.2, page 45, Triple Barrier Labeling Method
    This function applies the triple-barrier labeling method. It works on a set of
    datetime index values (molecule). This allows the program to parallelize the processing.
    Mainly it returns a DataFrame of timestamps regarding the time when the first barriers were reached.
    :param close: (series) close prices
    :param events: (series) of indices that signify "events" (see cusum_filter function
    for more details)
    :param molecule: (an array) a set of datetime index values for processing
    :param pt_plus: (series) of indices that signify the profit target multiplier
    :param sl_plus: (series) of indices that signify the stop loss multiplier
    :return: DataFrame of timestamps of when first barrier was touched
    """
    # Apply stop loss/profit taking, if it takes place before t1 (end of event)
    events_ = events.loc[molecule]
    out = events_[['t1']].copy(deep=True)

    profit_taking_multiple = pt_plus.loc[molecule]
    stop_loss_multiple = sl_plus.loc[molecule]

    # Profit taking active
    profit_taking_multiple[profit_taking_multiple <= 0] = np.nan
    profit_taking = profit_taking_multiple * events_['trgt']

    # Stop loss active
    stop_loss_multiple[stop_loss_multiple < 0] = np.nan
    stop_loss = -stop_loss_multiple * events_['trgt']

    # Get events
    for loc, vertical_barrier in events_['t1'].fillna(close.index[-1]).iteritems():
    closing_prices = close[loc: vertical_barrier] # Path prices for a given trade
    cum_returns = (closing_prices / close[loc] - 1) * events_.at[loc, 'side'] # Path returns
    out.loc[loc, 'sl'] = cum_returns[cum_returns < stop_loss[loc]].index.min() # Earliest stop loss date
    out.loc[loc, 'pt'] = cum_returns[cum_returns > profit_taking[loc]].index.min() # Earliest profit taking date

    return out


    def get_events_plus(close, t_events, pt_sl, target, min_ret, num_threads, vertical_barrier_times=False,
    pt_plus=None, sl_plus=None,
    side_prediction=None):
    """
    Snippet 3.6 page 50, Getting the Time of the First Touch, with Meta Labels
    This function is orchestrator to meta-label the data, in conjunction with the Triple Barrier Method.
    :param close: (series) Close prices
    :param t_events: (series) of t_events. These are timestamps that will seed every triple barrier.
    These are the timestamps selected by the sampling procedures discussed in Chapter 2, Section 2.5.
    Eg: CUSUM Filter
    :param pt_sl: (2 element array) element 0, indicates the profit taking level; element 1 is stop loss level.
    :param pt_plus: (series) of indices that signify the profit target multiplier. A non-negative float. A 0 value means that the respective horizontal barrier will be disabled.
    :param sl_plus: (series) of indices that signify the stop loss multiplier. A non-negative float. A 0 value means that the respective horizontal barrier will be disabled.
    :param target: (series) of values that are used (in conjunction with pt_sl) to determine the width
    of the barrier. In this program this is daily volatility series.
    :param min_ret: (float) The minimum target return required for running a triple barrier search.
    :param num_threads: (int) The number of threads concurrently used by the function.
    :param vertical_barrier_times: (series) A pandas series with the timestamps of the vertical barriers.
    We pass a False when we want to disable vertical barriers.
    :param side_prediction: (series) Side of the bet (long/short) as decided by the primary model
    :return: (data frame) of events
    -events.index is event's starttime
    -events['t1'] is event's endtime
    -events['trgt'] is event's target
    -events['side'] (optional) implies the algo's position side
    -events['pt'] Profit taking multiple
    -events['sl'] Stop loss multiple
    """

    # 1) Get target
    target = target.loc[t_events]
    target = target[target > min_ret] # min_ret
    pt_plus_ = pt_plus.loc[t_events]
    sl_plus_ = sl_plus.loc[t_events]

    # 2) Get vertical barrier (max holding period)
    if vertical_barrier_times is False:
    vertical_barrier_times = pd.Series(pd.NaT, index=t_events)

    # 3) Form events object, apply stop loss on vertical barrier
    if side_prediction is None:
    side_ = pd.Series(1.0, index=target.index)
    else:
    side_ = side_prediction.loc[target.index] # Subset side_prediction on target index.


    # Create a new df with [v_barrier, target, side] and drop rows that are NA in target
    events = pd.concat({'t1': vertical_barrier_times, 'trgt': target, 'side': side_}, axis=1)
    events = events.dropna(subset=['trgt'])

    # Apply Triple Barrier
    first_touch_dates = ml.labeling.mp_pandas_obj(func=apply_pt_sl_on_t1_plus,
    pd_obj=('molecule', events.index),
    num_threads=num_threads,
    close=close,
    events=events,
    pt_plus=pt_plus_,
    sl_plus=sl_plus_)

    for ind in events.index:
    events.loc[ind, 't1'] = first_touch_dates.loc[ind, :].dropna().min()

    if side_prediction is None:
    events = events.drop('side', axis=1)

    # Add profit taking and stop loss multiples for vertical barrier calculations
    events['pt'] = pt_plus_
    events['sl'] = sl_plus_
    return events