Skip to content

Instantly share code, notes, and snippets.

@talesa
Last active April 4, 2019 22:21
Show Gist options
  • Select an option

  • Save talesa/b679882ff23114939dab674f4c322b8a to your computer and use it in GitHub Desktop.

Select an option

Save talesa/b679882ff23114939dab674f4c322b8a to your computer and use it in GitHub Desktop.

Revisions

  1. talesa revised this gist Apr 4, 2019. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions author_list_to_pairs.py
    Original file line number Diff line number Diff line change
    @@ -20,6 +20,7 @@ def flatten(l):
    for row in rows:
    for a, b in itertools.combinations(row, 2):
    m[name_to_id[a], name_to_id[b]] += 1
    m[name_to_id[b], name_to_id[a]] += 1

    with open('pairs_output.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile)
  2. talesa revised this gist Apr 4, 2019. 1 changed file with 0 additions and 35 deletions.
    35 changes: 0 additions & 35 deletions author_list_to_pairs.py
    Original file line number Diff line number Diff line change
    @@ -1,64 +1,29 @@

    # coding: utf-8

    # In[8]:


    filename = 'Network-publishing-database2_short2.csv'


    # In[9]:


    import csv
    import itertools
    import numpy as np


    # In[10]:


    with open(filename, newline='') as csvfile:
    rows = csv.reader(csvfile)
    rows = [[i for i in row if i!=''] for row in rows]


    # In[11]:


    def flatten(l):
    return [item for sublist in l for item in sublist]


    # In[12]:


    id_to_name = list(set(flatten(rows)))
    name_to_id = {name: id for id, name in enumerate(id_to_name)}


    # In[13]:


    N = len(id_to_name)


    # In[14]:


    m = np.zeros((N, N), dtype=np.int)
    for row in rows:
    for a, b in itertools.combinations(row, 2):
    m[name_to_id[a], name_to_id[b]] += 1


    # In[15]:


    with open('pairs_output.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile)
    for i in range(N):
    for j in range(i+1):
    for _ in range(m[i,j]):
    spamwriter.writerow([id_to_name[i], id_to_name[j]])

  3. talesa created this gist Apr 4, 2019.
    64 changes: 64 additions & 0 deletions author_list_to_pairs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,64 @@

    # coding: utf-8

    # In[8]:


    filename = 'Network-publishing-database2_short2.csv'


    # In[9]:


    import csv
    import itertools
    import numpy as np


    # In[10]:


    with open(filename, newline='') as csvfile:
    rows = csv.reader(csvfile)
    rows = [[i for i in row if i!=''] for row in rows]


    # In[11]:


    def flatten(l):
    return [item for sublist in l for item in sublist]


    # In[12]:


    id_to_name = list(set(flatten(rows)))
    name_to_id = {name: id for id, name in enumerate(id_to_name)}


    # In[13]:


    N = len(id_to_name)


    # In[14]:


    m = np.zeros((N, N), dtype=np.int)
    for row in rows:
    for a, b in itertools.combinations(row, 2):
    m[name_to_id[a], name_to_id[b]] += 1


    # In[15]:


    with open('pairs_output.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile)
    for i in range(N):
    for j in range(i+1):
    for _ in range(m[i,j]):
    spamwriter.writerow([id_to_name[i], id_to_name[j]])