This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| from torch import nn | |
| import math | |
| class PositionalEncoding(nn.Module): | |
| "Implement the PE function." | |
| def __init__(self, d_model, max_len=5000): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| d_model=512 | |
| def positional_embedding(pos,d_model=512): | |
| dpow=np.array([2*i/d_model for i in range(d_model//2)]) # takes care of 10k^ 2*i/d_model | |
| # defining the sin/cos denom | |
| sindenom=np.power(np.repeat(10000,d_model//2),dpow) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import treelite | |
| import treelite_runtime # runtime module | |
| import numpy as np | |
| import time | |
| dim = 100 | |
| toolchain = 'gcc' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import treelite | |
| import treelite_runtime # runtime module | |
| import numpy as np | |
| import time | |
| dim = 100 | |
| toolchain = 'gcc' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import xgboost as xgb | |
| import numpy as np | |
| import random | |
| def xgb_data(training_rows=1000000,dim=100): | |
| """ | |
| Create training/validation data for xgboost |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import lightgbm as lgb | |
| import numpy as np | |
| import random | |
| from collections import Counter | |
| def lgb_data(training_rows=1000000,dim=100): | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256),\ | |
| cin_layer_size=(128, 128), \ | |
| cin_split_half=True, cin_activation='relu'\ | |
| ,l2_reg_linear=1e-05,\ | |
| l2_reg_embedding=1e-05, l2_reg_dnn=0, l2_reg_cin=0, \ | |
| init_std=0.0001,seed=1024, dnn_dropout=0,dnn_activation='relu', \ | |
| dnn_use_bn=False, task='binary') | |
| #compiling the model | |
| model.compile("adam", "binary_crossentropy",metrics=['binary_crossentropy'], ) | |
| # training the model |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # features to be used for dnn part of xdeepfm | |
| dnn_feature_columns = sparse_feature_columns + dense_feature_columns | |
| # features to be used for linear part of xdeepfm | |
| linear_feature_columns = sparse_feature_columns + dense_feature_columns | |
| feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) | |
| # creating train test splits | |
| train, test = train_test_split(data, test_size=0.2) | |
| train_model_input = {name:train[name].values for name in feature_names} | |
| test_model_input = {name:test[name].values for name in feature_names} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # encoding function | |
| def encoding(data,feat,encoder): | |
| data[feat] = encoder.fit_transform(data[feat]) | |
| # encoding for categorical features | |
| [encoding(data,feat,LabelEncoder()) for feat in sparse_features] | |
| # Using normalization for dense feature | |
| mms = MinMaxScaler(feature_range=(0,1)) | |
| data[dense_features] = mms.fit_transform(data[dense_features]) | |
| # creating a 4 bit embedding for every sparse feature | |
| sparse_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4) \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # reading criteo_sample data | |
| data = pd.read_csv('./criteo_sample.txt') | |
| # categorising the features into sparse/dense feature set | |
| sparse_features = ['C' + str(i) for i in range(1, 27)] | |
| dense_features = ['I'+str(i) for i in range(1, 14)] | |
| # data imputation for missing values | |
| data[sparse_features] = data[sparse_features].fillna('-1', ) | |
| data[dense_features] = data[dense_features].fillna(0,) | |
| # creating target variable | |
| target = ['label'] |
NewerOlder