Created
December 9, 2018 05:22
-
-
Save witchapong/6136784504524c4c88f285dac14e2fa8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from matplotlib import pyplot | |
| import pandas as pd | |
| from IPython.core.display import display, HTML | |
| display(HTML("<style>.container { width:90% !important; }</style>")) | |
| pd.options.display.max_columns = 999 | |
| pd.options.display.max_rows = 999 | |
| # function สำหรับ preprocess ข้อมูล time series หลายตัวแปร | |
| def series_to_supervised(data, n_in=1, n_out=1, dropnan=True,feat_name=None): | |
| n_vars = 1 if type(data) is list else data.shape[1] | |
| df = pd.DataFrame(data) | |
| cols, names = list(), list() | |
| # input sequence (t-n, ... t-1) | |
| for i in range(n_in, 0, -1): | |
| cols.append(df.shift(i)) | |
| names += [f'{feat_name[j]}(t-{i})' for j in range(n_vars)] | |
| # forecast sequence (t, t+1, ... t+n) | |
| for i in range(0, n_out): | |
| cols.append(df.shift(-i)) | |
| if i == 0: | |
| names += [f'{feat_name[j]}(t)' for j in range(n_vars)] | |
| else: | |
| names += [f'{feat_name[j]}(t+{i})' for j in range(n_vars)] | |
| # put it all together | |
| agg = pd.concat(cols, axis=1) | |
| agg.columns = names | |
| # drop rows with NaN values | |
| if dropnan: | |
| agg.dropna(inplace=True) | |
| return agg | |
| # เลือกข้อมูลหุ้นจาก list ของ DataFrame และ drop column 'Close' เนื่องจากเราจะใช้ column 'Adj. Close' เท่านั้น | |
| stock_data[0] = stock_data[0].drop('Close',axis=1) | |
| dataset = stock_data[0] | |
| values = dataset.values | |
| values = values.astype('float32') | |
| # ทำ scaling ข้อมูลด้วยวิธี min max scaling เปลี่ยน scale ข้อมูลแต่ละ column ให้อยู่ระหว่าง [0,1] และเก็บค่า min max แต่ละ column ไว้สำหรับทำ rescale ข้อมูลภายหลัง | |
| min_dict = dict() | |
| max_dict = dict() | |
| for col in dataset.columns: | |
| min_dict[col] = dataset[col].min() | |
| max_dict[col] = dataset[col].max() | |
| dataset[col] = (dataset[col] - dataset[col].min())/(dataset[col].max()-dataset[col].min()) | |
| # ใช้ function สำหรับ preprocess ข้อมูลที่เขียนไว้ และ drop column ที่ไม่ได้ใช้ | |
| reframed = series_to_supervised(dataset.values, 30, 3,feat_name=stock_data[0].columns) | |
| reframed.drop(['High(t)','High(t+1)','High(t+2)','Low(t)','Low(t+1)','Low(t+2)','Open(t)','Open(t+1)','Open(t+2)','Volume(t)','Volume(t+1)',\ | |
| 'Volume(t+2)'],axis=1,inplace=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment