|
from scholarly import scholarly, ProxyGenerator |
|
import os |
|
import json |
|
import pandas as pd |
|
|
|
# if proxy |
|
pg = ProxyGenerator() |
|
pg.SingleProxy(http = 'http://localhost:7890') |
|
scholarly.use_proxy(pg) |
|
|
|
# related keywords |
|
keywords = [ |
|
'federated', |
|
'distributed', |
|
'privacy', |
|
'private', |
|
'differential', |
|
] |
|
|
|
count = 0 |
|
maxcount = 200 |
|
authors = [] |
|
field_keyword = 'federated_learning' |
|
search_query = scholarly.search_author('label:' + field_keyword) |
|
while count < maxcount: |
|
print('searching:', count) |
|
author = scholarly.fill(next(search_query)) |
|
# print(author) |
|
# break |
|
|
|
name = author['name'] |
|
citedby = author['citedby'] |
|
hindex = author['hindex'] |
|
i10index = author['i10index'] |
|
interests = author['interests'] |
|
affiliation = author['affiliation'] |
|
publications = author['publications'] |
|
|
|
print('name:', name) |
|
print('aff:', affiliation) |
|
print('citedby:', citedby) |
|
print('hindex:', hindex) |
|
print('i10index:', i10index) |
|
print('interests:', interests) |
|
print('total:',len(publications)) |
|
|
|
flcount = 0 |
|
flcites = 0 |
|
|
|
for pub in publications: |
|
pubtitle = pub['bib']['title'] |
|
numcites = pub['num_citations'] |
|
|
|
for keyword in keywords: |
|
if keyword.lower() in pubtitle.lower(): |
|
flcount += 1 |
|
flcites += numcites |
|
break |
|
|
|
print('flcount:', flcount) |
|
print('flcites:', flcites) |
|
print() |
|
|
|
authors.append({ |
|
'name': name, |
|
'aff': affiliation, |
|
'citedby': citedby, |
|
'hindex': hindex, |
|
'i10index': i10index, |
|
'totalpub': len(publications), |
|
'flpub': flcount, |
|
'flcitedby': flcites, |
|
'interests': ', '.join(interests) |
|
}) |
|
|
|
count += 1 |
|
|
|
print('total', len(authors)) |
|
print(authors[0]) |
|
|
|
# save to xlsx |
|
df_json = pd.DataFrame(authors) |
|
df_json.to_excel('fl authors.xlsx') |
scholarly documentation: https://scholarly.readthedocs.io/