Last active
April 4, 2019 22:21
-
-
Save talesa/b679882ff23114939dab674f4c322b8a to your computer and use it in GitHub Desktop.
Revisions
-
talesa revised this gist
Apr 4, 2019 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,6 +20,7 @@ def flatten(l): for row in rows: for a, b in itertools.combinations(row, 2): m[name_to_id[a], name_to_id[b]] += 1 m[name_to_id[b], name_to_id[a]] += 1 with open('pairs_output.csv', 'w', newline='') as csvfile: spamwriter = csv.writer(csvfile) -
talesa revised this gist
Apr 4, 2019 . 1 changed file with 0 additions and 35 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,64 +1,29 @@ filename = 'Network-publishing-database2_short2.csv' import csv import itertools import numpy as np with open(filename, newline='') as csvfile: rows = csv.reader(csvfile) rows = [[i for i in row if i!=''] for row in rows] def flatten(l): return [item for sublist in l for item in sublist] id_to_name = list(set(flatten(rows))) name_to_id = {name: id for id, name in enumerate(id_to_name)} N = len(id_to_name) m = np.zeros((N, N), dtype=np.int) for row in rows: for a, b in itertools.combinations(row, 2): m[name_to_id[a], name_to_id[b]] += 1 with open('pairs_output.csv', 'w', newline='') as csvfile: spamwriter = csv.writer(csvfile) for i in range(N): for j in range(i+1): for _ in range(m[i,j]): spamwriter.writerow([id_to_name[i], id_to_name[j]]) -
talesa created this gist
Apr 4, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,64 @@ # coding: utf-8 # In[8]: filename = 'Network-publishing-database2_short2.csv' # In[9]: import csv import itertools import numpy as np # In[10]: with open(filename, newline='') as csvfile: rows = csv.reader(csvfile) rows = [[i for i in row if i!=''] for row in rows] # In[11]: def flatten(l): return [item for sublist in l for item in sublist] # In[12]: id_to_name = list(set(flatten(rows))) name_to_id = {name: id for id, name in enumerate(id_to_name)} # In[13]: N = len(id_to_name) # In[14]: m = np.zeros((N, N), dtype=np.int) for row in rows: for a, b in itertools.combinations(row, 2): m[name_to_id[a], name_to_id[b]] += 1 # In[15]: with open('pairs_output.csv', 'w', newline='') as csvfile: spamwriter = csv.writer(csvfile) for i in range(N): for j in range(i+1): for _ in range(m[i,j]): spamwriter.writerow([id_to_name[i], id_to_name[j]])