using LinearAlgebra function main() # input data train_data = read_data("./ml-100k/u.data") n_user = length(unique([t[1] for t in train_data])) n_item = length(unique([t[2] for t in train_data])) # parameters P, Q = fit(n_user, n_item, train_data) end function read_data(file_path) f = open(file_path) data = readlines(f) train_data = [] for l in data u, i, r, ts = [parse(Float64, x) for x in split(l, "\t")] append!(train_data, [(Int(u), Int(i), r)]) end return train_data end function fit(n_user, n_item, train_data, n_itr=50, n_fac=5, γ=0.07, λ=0.01) # init parameters P = randn(Float16, n_user, n_fac) Q = randn(Float16, n_item, n_fac) # optimaize: SGD for itr in 1:n_itr loss = 0 for (u, i, r) in train_data # calc error pu, qi = P[u, :], Q[i, :] e = r - pu ⋅ qi Q[i, :] += γ * (e * pu - λ * qi) P[u, :] += γ * (e * qi - λ * pu) # calc loss loss += e*e + λ * (P[u, :] ⋅ P[u, :] + Q[i, :] ⋅ Q[i, :]) end println("$itr: $loss") end return P, Q end main()