Skip to content

Instantly share code, notes, and snippets.

@humanscape-david
Created September 15, 2021 01:52
Show Gist options
  • Select an option

  • Save humanscape-david/ec83791ca12ec48eb4979079a1559a03 to your computer and use it in GitHub Desktop.

Select an option

Save humanscape-david/ec83791ca12ec48eb4979079a1559a03 to your computer and use it in GitHub Desktop.
서울대학교 희귀질환센터 관련 질환 크롤러
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import pandas as pd
# 서울대학교 희귀질환센터 질환 주소
# ex: ["https://raredisease.snuh.org/[질환명]"]
SITE_URL_ARR = []
# 질환의 관련 질환 데이터 엑셀로 추출
def get_related_disease_from_snu():
RELATED_DISEASE_NAME_ARR = []
for SITE_URL in SITE_URL_ARR:
response = requests.get(SITE_URL, headers={'User-Agent': 'Mozilla/5.0'})
html = response.text
soup = BeautifulSoup(html, 'lxml')
data = soup.find('table', class_ = 'table2').find_all('tr')[1].find_all('td')[1]
RELATED_DISEASE_NAME_ARR.append(data)
df = pd.DataFrame(RELATED_DISEASE_NAME_ARR)
df.to_excel('data.xlsx')
return True
get_related_disease_from_snu()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment