-
-
Save ProAek11/f5c64fcb467e47203c7cf905f2182130 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from matplotlib import pyplot | |
| import pandas as pd | |
| from IPython.core.display import display, HTML | |
| display(HTML("<style>.container { width:90% !important; }</style>")) | |
| pd.options.display.max_columns = 999 | |
| pd.options.display.max_rows = 999 | |
| # function สำหรับ preprocess ข้อมูล time series หลายตัวแปร | |
| def series_to_supervised(data, n_in=1, n_out=1, dropnan=True,feat_name=None): | |
| n_vars = 1 if type(data) is list else data.shape[1] | |
| df = pd.DataFrame(data) | |
| cols, names = list(), list() | |
| # input sequence (t-n, ... t-1) | |
| for i in range(n_in, 0, -1): | |
| cols.append(df.shift(i)) | |
| names += [f'{feat_name[j]}(t-{i})' for j in range(n_vars)] | |
| # forecast sequence (t, t+1, ... t+n) | |
| for i in range(0, n_out): | |
| cols.append(df.shift(-i)) | |
| if i == 0: | |
| names += [f'{feat_name[j]}(t)' for j in range(n_vars)] | |
| else: | |
| names += [f'{feat_name[j]}(t+{i})' for j in range(n_vars)] | |
| # put it all together | |
| agg = pd.concat(cols, axis=1) | |
| agg.columns = names | |
| # drop rows with NaN values | |
| if dropnan: | |
| agg.dropna(inplace=True) | |
| return agg | |
| # เลือกข้อมูลหุ้นจาก list ของ DataFrame และ drop column 'Close' เนื่องจากเราจะใช้ column 'Adj. Close' เท่านั้น | |
| stock_data[0] = stock_data[0].drop('Close',axis=1) | |
| dataset = stock_data[0] | |
| values = dataset.values | |
| values = values.astype('float32') | |
| # ทำ scaling ข้อมูลด้วยวิธี min max scaling เปลี่ยน scale ข้อมูลแต่ละ column ให้อยู่ระหว่าง [0,1] และเก็บค่า min max แต่ละ column ไว้สำหรับทำ rescale ข้อมูลภายหลัง | |
| min_dict = dict() | |
| max_dict = dict() | |
| for col in dataset.columns: | |
| min_dict[col] = dataset[col].min() | |
| max_dict[col] = dataset[col].max() | |
| dataset[col] = (dataset[col] - dataset[col].min())/(dataset[col].max()-dataset[col].min()) | |
| # ใช้ function สำหรับ preprocess ข้อมูลที่เขียนไว้ และ drop column ที่ไม่ได้ใช้ | |
| reframed = series_to_supervised(dataset.values, 30, 3,feat_name=stock_data[0].columns) | |
| reframed.drop(['High(t)','High(t+1)','High(t+2)','Low(t)','Low(t+1)','Low(t+2)','Open(t)','Open(t+1)','Open(t+2)','Volume(t)','Volume(t+1)',\ | |
| 'Volume(t+2)'],axis=1,inplace=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment