Last active
March 12, 2024 11:44
-
-
Save inkrement/497c3be06f002be6dd1a842a4475f2dc to your computer and use it in GitHub Desktop.
Revisions
-
inkrement revised this gist
Aug 8, 2019 . 1 changed file with 10 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -23,14 +23,21 @@ load_embedding <- function(file_path){ embeddings_env } cosine_similarity <- function(a,b){ # assuming unit vectors # the cosine is just the dot-product a %*% b } most_similar <- function(embeddings, ref_item, n_top = 10){ # calculate cos similarity to ref_item for all elements cos_sims <- eapply(embeddings, cosine_similarity, b = ref_item) # only look at cos values smaller than 1 # this will ignore the same element cos_sims <- cos_sims[cos_sims < 1] # return top elements cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top] } -
inkrement revised this gist
Aug 7, 2019 . No changes.There are no files selected for viewing
-
inkrement revised this gist
Aug 7, 2019 . 1 changed file with 7 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,7 @@ load_embedding <- function(file_path){ # create new environment embeddings_env <- new.env(hash = TRUE, parent = emptyenv()) # this function is used to convert vectors to unit vectors # by dividing their components by vector length normalize_vector <- function(a){ @@ -14,10 +14,10 @@ load_embedding <- function(file_path){ # iterate through the whole file line by line for (i in 1:length(lines)) { line <- lines[[i]] values <- strsplit(line, " ")[[1]] label <- values[[1]] embeddings_env[[label]] <- normalize_vector(as.double(values[-1])) } embeddings_env @@ -26,11 +26,11 @@ load_embedding <- function(file_path){ most_similar <- function(embeddings, ref_item, n_top = 10){ # calculate cos similarity to ref_item for all elements cos_sims <- eapply(embeddings, cos_sim, b = ref_item) # only look at cos values smaller than 1 # this will ignore the same element cos_sims <- cos_sims[cos_sims < 1] # return top elements cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top] } -
inkrement revised this gist
Aug 7, 2019 . No changes.There are no files selected for viewing
-
inkrement created this gist
Aug 7, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,36 @@ load_embedding <- function(file_path){ # load full file lines <- readLines(file_path) # create new environment embeddings_env <- new.env(hash = TRUE, parent = emptyenv()) # this function is used to convert vectors to unit vectors # by dividing their components by vector length normalize_vector <- function(a){ a/sqrt(sum(a**2)) } # iterate through the whole file line by line for (i in 1:length(lines)) { line <- lines[[i]] values <- strsplit(line, " ")[[1]] label <- values[[1]] embeddings_env[[label]] <- normalize_vector(as.double(values[-1])) } embeddings_env } most_similar <- function(embeddings, ref_item, n_top = 10){ # calculate cos similarity to ref_item for all elements cos_sims <- eapply(embeddings, cos_sim, b = ref_item) # only look at cos values smaller than 1 # this will ignore the same element cos_sims <- cos_sims[cos_sims < 1] # return top elements cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top] }