Created
September 15, 2021 01:52
-
-
Save humanscape-david/ec83791ca12ec48eb4979079a1559a03 to your computer and use it in GitHub Desktop.
서울대학교 희귀질환센터 관련 질환 크롤러
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| # 서울대학교 희귀질환센터 질환 주소 | |
| # ex: ["https://raredisease.snuh.org/[질환명]"] | |
| SITE_URL_ARR = [] | |
| # 질환의 관련 질환 데이터 엑셀로 추출 | |
| def get_related_disease_from_snu(): | |
| RELATED_DISEASE_NAME_ARR = [] | |
| for SITE_URL in SITE_URL_ARR: | |
| response = requests.get(SITE_URL, headers={'User-Agent': 'Mozilla/5.0'}) | |
| html = response.text | |
| soup = BeautifulSoup(html, 'lxml') | |
| data = soup.find('table', class_ = 'table2').find_all('tr')[1].find_all('td')[1] | |
| RELATED_DISEASE_NAME_ARR.append(data) | |
| df = pd.DataFrame(RELATED_DISEASE_NAME_ARR) | |
| df.to_excel('data.xlsx') | |
| return True | |
| get_related_disease_from_snu() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment