Skip to content

Instantly share code, notes, and snippets.

@andreas-h
Created December 5, 2013 16:30
Show Gist options
  • Select an option

  • Save andreas-h/7808564 to your computer and use it in GitHub Desktop.

Select an option

Save andreas-h/7808564 to your computer and use it in GitHub Desktop.

Revisions

  1. andreas-h created this gist Dec 5, 2013.
    181 changes: 181 additions & 0 deletions r_stl.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,181 @@
    # -*- coding: utf-8 -*-

    import datetime

    from numpy import asarray, ceil
    import pandas
    import rpy2.robjects as robjects

    def stl(data, ns, np=None, nt=None, nl=None, isdeg=0, itdeg=1, ildeg=1,
    nsjump=None, ntjump=None, nljump=None, ni=2, no=0, fulloutput=False):
    """
    Seasonal-Trend decomposition procedure based on LOESS
    data : pandas.Series
    ns : int
    Length of the seasonal smoother.
    The value of ns should be an odd integer greater than or equal to 3.
    A value ns>6 is recommended. As ns increases the values of the
    seasonal component at a given point in the seasonal cycle (e.g., January
    values of a monthly series with a yearly cycle) become smoother.
    np : int
    Period of the seasonal component.
    For example, if the time series is monthly with a yearly cycle, then
    np=12.
    If no value is given, then the period will be determined from the
    ``data`` timeseries.
    nt : int
    Length of the trend smoother.
    The value of nt should be an odd integer greater than or equal to 3.
    A value of nt between 1.5*np and 2*np is recommended. As nt increases,
    the values of the trend component become smoother.
    If nt is None, it is estimated as the smallest odd integer greater
    or equal to ``(1.5*np)/[1-(1.5/ns)]``
    nl : int
    Length of the low-pass filter.
    The value of nl should be an odd integer greater than or equal to 3.
    The smallest odd integer greater than or equal to np is used by default.
    isdeg : int
    Degree of locally-fitted polynomial in seasonal smoothing.
    The value is 0 or 1.
    itdeg : int
    Degree of locally-fitted polynomial in trend smoothing.
    The value is 0 or 1.
    ildeg : int
    Degree of locally-fitted polynomial in low-pass smoothing.
    The value is 0 or 1.
    nsjump : int
    Skipping value for seasonal smoothing.
    The seasonal smoother skips ahead nsjump points and then linearly
    interpolates in between. The value of nsjump should be a positive
    integer; if nsjump=1, a seasonal smooth is calculated at all n points.
    To make the procedure run faster, a reasonable choice for nsjump is
    10%-20% of ns. By default, nsjump= 0.1*ns.
    ntjump : int
    Skipping value for trend smoothing. If None, ntjump= 0.1*nt
    nljump : int
    Skipping value for low-pass smoothing. If None, nljump= 0.1*nl
    ni :int
    Number of loops for updating the seasonal and trend components.
    The value of ni should be a positive integer.
    See the next argument for advice on the choice of ni.
    If ni is None, ni is set to 2 for robust fitting, to 5 otherwise.
    no : int
    Number of iterations of robust fitting. The value of no should
    be a nonnegative integer. If the data are well behaved without
    outliers, then robustness iterations are not needed. In this case
    set no=0, and set ni=2 to 5 depending on how much security
    you want that the seasonal-trend looping converges.
    If outliers are present then no=3 is a very secure value unless
    the outliers are radical, in which case no=5 or even 10 might
    be better. If no>0 then set ni to 1 or 2.
    If None, then no is set to 15 for robust fitting, to 0 otherwise.
    fulloutput : bool
    If True, a dictionary holding the full output of the original R routine
    will be returned.
    returns
    data : pandas.DataFrame
    The seasonal, trend, and remainder components
    """
    # make sure that data doesn't start or end with nan
    _data = data.copy()
    _data = _data.dropna()
    # TODO: account for non-monthly series
    _idx = pandas.DateRange(start=_data.index[0], end=_data.index[-1],
    offset=pandas.datetools.MonthBegin())
    data = pandas.Series(index=_idx)
    data[_data.index] = _data

    # zoo package contains na.approx
    zoo_ = robjects.packages.importr("zoo")

    ts_ = robjects.r['ts']
    stl_ = robjects.r['stl']
    naaction_ = robjects.r['na.approx']

    # find out the period of the time series
    if np is None:
    np = 12
    # TODO: find out the offset of the Series, and set np accordingly
    #if isinstance(data.index.offset, pandas.core.datetools.MonthEnd):
    # np = 12
    #else:
    # raise NotImplementedError()
    # fill default values
    if nt is None:
    nt = ceil((1.5 * np) / (1 - (1.5 / ns)))
    nt = nt + 1 if nt % 2 == 0 else nt

    if nl is None:
    nl = np if np % 2 is 1 else np + 1
    if nsjump is None:
    nsjump = ceil(ns / 10.)
    if ntjump is None:
    ntjump = ceil(nt / 10.)
    if nljump is None:
    nljump = ceil(nl / 10.)

    # convert data to R object
    if np is 12:
    start = robjects.IntVector([data.index[0].year, data.index[0].month])
    ts = ts_(robjects.FloatVector(asarray(data)), start=start, frequency=np)

    if nt is None:
    nt = robjects.rinterface.R_NilValue

    result = stl_(ts, ns, isdeg, nt, itdeg, nl, ildeg, nsjump, ntjump, nljump,
    False, ni, no, naaction_)

    res_ts = asarray(result[0])
    try:
    res_ts = pandas.DataFrame({"seasonal" : pandas.Series(res_ts[:,0],
    index=data.index),
    "trend" : pandas.Series(res_ts[:,1],
    index=data.index),
    "remainder" : pandas.Series(res_ts[:,2],
    index=data.index)})
    except:
    return res_ts, data
    raise
    # res_ts = pandas.DataFrame({"seasonal" : pandas.Series(index=data.index),
    # "trend" : pandas.Series(index=data.index),
    # "remainder" : pandas.Series(index=data.index)})

    if fulloutput:
    return {"time.series" : res_ts,
    "weights" : result[1],
    "call" : result[2],
    "win" : result[3],
    "deg" : result[4],
    "jump" : result[5],
    "ni" : result[6],
    "no" : result[7]}
    else:
    return res_ts


    if __name__ == "__main__":
    data = np.arange(85.) / 12.
    data = np.sin(data * (2*np.pi))
    data += np.arange(85.) / 12. * .5
    data += .1 * np.random.randn(85)
    idx = pandas.DateRange(start=datetime.datetime(1999,1,1), end=datetime.datetime(2006,2,1), offset=pandas.datetools.MonthEnd())
    data = pandas.Series(data, index=idx)

    res = stl(data, 7, nt=11)