Skip to content

Instantly share code, notes, and snippets.

import torch
from torch import nn
import math
class PositionalEncoding(nn.Module):
"Implement the PE function."
def __init__(self, d_model, max_len=5000):
@abhi1868sharma
abhi1868sharma / positional_encoding_numpy.py
Last active November 28, 2021 12:57
This gist creates a positional encoding of size x.size(1),512
import numpy as np
d_model=512
def positional_embedding(pos,d_model=512):
dpow=np.array([2*i/d_model for i in range(d_model//2)]) # takes care of 10k^ 2*i/d_model
# defining the sin/cos denom
sindenom=np.power(np.repeat(10000,d_model//2),dpow)
import treelite
import treelite_runtime # runtime module
import numpy as np
import time
dim = 100
toolchain = 'gcc'
import treelite
import treelite_runtime # runtime module
import numpy as np
import time
dim = 100
toolchain = 'gcc'
import xgboost as xgb
import numpy as np
import random
def xgb_data(training_rows=1000000,dim=100):
"""
Create training/validation data for xgboost
import lightgbm as lgb
import numpy as np
import random
from collections import Counter
def lgb_data(training_rows=1000000,dim=100):
model = xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256),\
cin_layer_size=(128, 128), \
cin_split_half=True, cin_activation='relu'\
,l2_reg_linear=1e-05,\
l2_reg_embedding=1e-05, l2_reg_dnn=0, l2_reg_cin=0, \
init_std=0.0001,seed=1024, dnn_dropout=0,dnn_activation='relu', \
dnn_use_bn=False, task='binary')
#compiling the model
model.compile("adam", "binary_crossentropy",metrics=['binary_crossentropy'], )
# training the model
# features to be used for dnn part of xdeepfm
dnn_feature_columns = sparse_feature_columns + dense_feature_columns
# features to be used for linear part of xdeepfm
linear_feature_columns = sparse_feature_columns + dense_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# creating train test splits
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}
# encoding function
def encoding(data,feat,encoder):
data[feat] = encoder.fit_transform(data[feat])
# encoding for categorical features
[encoding(data,feat,LabelEncoder()) for feat in sparse_features]
# Using normalization for dense feature
mms = MinMaxScaler(feature_range=(0,1))
data[dense_features] = mms.fit_transform(data[dense_features])
# creating a 4 bit embedding for every sparse feature
sparse_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4) \
# reading criteo_sample data
data = pd.read_csv('./criteo_sample.txt')
# categorising the features into sparse/dense feature set
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I'+str(i) for i in range(1, 14)]
# data imputation for missing values
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0,)
# creating target variable
target = ['label']