inkrement · March 12, 2024 11:44 · Aug 8, 2019 · Aug 7, 2019 · Aug 7, 2019 · Aug 7, 2019
diff --git a/baseR_embeddings.R b/baseR_embeddings.R
@@ -23,14 +23,21 @@ load_embedding <- function(file_path){
     embeddings_env
 }
 
+cosine_similarity <- function(a,b){
+    # assuming unit vectors
+    # the cosine is just the dot-product
+    a %*% b
+}
+
+
 most_similar <- function(embeddings, ref_item, n_top = 10){
     # calculate cos similarity to ref_item for all elements
-    cos_sims <- eapply(embeddings, cos_sim, b = ref_item)
-
+    cos_sims <- eapply(embeddings, cosine_similarity, b = ref_item)
+    
     # only look at cos values smaller than 1
     # this will ignore the same element
     cos_sims <- cos_sims[cos_sims < 1]
-
+    
     # return top elements
     cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top]
 }
diff --git a/baseR_embeddings.R b/baseR_embeddings.R
@@ -5,7 +5,7 @@ load_embedding <- function(file_path){
 
     # create new environment
     embeddings_env <- new.env(hash = TRUE, parent = emptyenv())
-    
+
     # this function is used to convert vectors to unit vectors
     # by dividing their components by vector length
     normalize_vector <- function(a){
@@ -14,10 +14,10 @@ load_embedding <- function(file_path){
 
     # iterate through the whole file line by line
     for (i in 1:length(lines)) {
-      line <- lines[[i]]
-      values <- strsplit(line, " ")[[1]]
-      label <- values[[1]]
-      embeddings_env[[label]] <- normalize_vector(as.double(values[-1]))
+        line <- lines[[i]]
+        values <- strsplit(line, " ")[[1]]
+        label <- values[[1]]
+        embeddings_env[[label]] <- normalize_vector(as.double(values[-1]))
     }
 
     embeddings_env
@@ -26,11 +26,11 @@ load_embedding <- function(file_path){
 most_similar <- function(embeddings, ref_item, n_top = 10){
     # calculate cos similarity to ref_item for all elements
     cos_sims <- eapply(embeddings, cos_sim, b = ref_item)
-    
+
     # only look at cos values smaller than 1
     # this will ignore the same element
     cos_sims <- cos_sims[cos_sims < 1]
-    
+
     # return top elements
     cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top]
 }
diff --git a/baseR_embeddings.R b/baseR_embeddings.R
@@ -0,0 +1,36 @@
+load_embedding <- function(file_path){
+
+    # load full file
+    lines <- readLines(file_path)
+
+    # create new environment
+    embeddings_env <- new.env(hash = TRUE, parent = emptyenv())
+
+    # this function is used to convert vectors to unit vectors
+    # by dividing their components by vector length
+    normalize_vector <- function(a){
+        a/sqrt(sum(a**2))
+    }
+
+    # iterate through the whole file line by line
+    for (i in 1:length(lines)) {
+      line <- lines[[i]]
+      values <- strsplit(line, " ")[[1]]
+      label <- values[[1]]
+      embeddings_env[[label]] <- normalize_vector(as.double(values[-1]))
+    }
+
+    embeddings_env
+}
+
+most_similar <- function(embeddings, ref_item, n_top = 10){
+    # calculate cos similarity to ref_item for all elements
+    cos_sims <- eapply(embeddings, cos_sim, b = ref_item)
+
+    # only look at cos values smaller than 1
+    # this will ignore the same element
+    cos_sims <- cos_sims[cos_sims < 1]
+
+    # return top elements
+    cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top]
+}
No results found