Skip to content

Instantly share code, notes, and snippets.

@2minchul
Created March 17, 2023 16:00
Show Gist options
  • Select an option

  • Save 2minchul/6ebd790f20cca7b48d1c3d3b71ac051d to your computer and use it in GitHub Desktop.

Select an option

Save 2minchul/6ebd790f20cca7b48d1c3d3b71ac051d to your computer and use it in GitHub Desktop.

Revisions

  1. 2minchul created this gist Mar 17, 2023.
    210 changes: 210 additions & 0 deletions example.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,210 @@
    from concurrent.futures import ThreadPoolExecutor, as_completed
    from typing import Optional, List, Tuple, Dict

    import requests
    from bs4 import BeautifulSoup


    def search_many_postcode(queries: List[Tuple[str, str]]) -> Dict[tuple, Optional[dict]]:
    """
    :param queries: list of tuple(keyword, zipcode)
    :return: {tuple(keyword, zipcode): dict of address}
    """
    result = {}
    with ThreadPoolExecutor(max_workers=10) as pool:
    with requests.Session() as session:
    # Run `search_postcode(pair[0], pair[1], session)` in thread
    futures = {pool.submit(search_postcode, pair[0], pair[1], session): pair for pair in queries}
    for future in as_completed(futures):
    pair = futures[future]
    result[pair] = future.result()

    return result


    def search_postcode(keyword: str, zipcode: str, session: Optional[requests.Session] = None) -> Optional[dict]:
    html = request_search_postcode(keyword, session)
    for data in parse_iter_postcode(html, keyword):
    if data.get('zonecode') == zipcode:
    return data


    def request_search_postcode(keyword: str, session: Optional[requests.Session] = None) -> str:
    headers = {
    'authority': 'postcode.map.daum.net',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
    'referer': 'https://postcode.map.daum.net',
    'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'iframe',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
    }
    url = 'https://postcode.map.daum.net/search?' \
    f'region_name={keyword}&cq={keyword}&cpage=1&origin=https://postcode.map.daum.net&' \
    f'isp=N&isgr=N&isgj=N&ongr=&ongj=&regionid=&regionname=&roadcode=&roadname=&banner=on&' \
    f'ubl=on&indaum=off&vt=popup&amr=on&amj=on&ani=off&mode=transmit&sd=on&fi=on&fc=on&hmb=off&' \
    f'heb=off&asea=off&smh=off&zo=on&theme=&bit=&sit=&sgit=&sbit=&pit=&mit=&lcit=&plrg=&plrgt=1.5&' \
    f'us=on&msi=10&ahs=off&whas=500&zn=Y&sm=on&CWinWidth=500&sptype=&sporgq=&a51=off'
    response: requests.Response
    if session:
    response = session.get(url, headers=headers)
    else:
    response = requests.get(url, headers=headers)
    with response:
    response.raise_for_status()
    return response.text


    def _get_data_attrs(tag):
    return {k[5:]: v for k, v in tag.attrs.items() if k.startswith('data-')}


    def parse_iter_postcode(html, keyword=''):
    soup = BeautifulSoup(html, 'html.parser')
    ul = soup.find('ul', attrs={'class': 'list_post'})
    if not ul:
    return
    for li_tag in ul.find_all('li'):
    searched = _get_data_attrs(li_tag)
    address_dl = li_tag.find('dl', attrs={'class': 'list_address'})
    if not address_dl:
    continue
    road = {}
    jibun = {}
    road_dd = address_dl.find('dd', attrs={'class': 'main_road'}) or address_dl.find(
    'dd', attrs={'class': 'rel_road'})
    if road_dd:
    span = road_dd.find('span', attrs={'class': 'txt_address'})
    road = _get_data_attrs(span)
    jibun_dd = address_dl.find('dd', attrs={'class': 'main_jibun'}) or address_dl.find(
    'dd', attrs={'class': 'rel_jibun'})
    if jibun_dd:
    span = jibun_dd.find('span', attrs={'class': 'txt_address'})
    jibun = _get_data_attrs(span)

    data = {
    'query': keyword,
    '_from': 'html',
    'addressType': searched.get('addr_type', ''),
    'userSelectedType': searched.get('addr_type', ''),
    'address': searched.get('addr', ''),
    'addressEnglish': searched.get('addr_eng', ''),
    'bcode': searched.get('bcode', ''),
    'bname': searched.get('bname', ''),
    'bnameEnglish': searched.get('bname_eng', ''),
    'bname1': searched.get('bname1', ''),
    'bname1English': searched.get('bname1_eng', ''),
    'bname2': searched.get('bname2', ''),
    'bname2English': searched.get('bname2_eng', ''),
    'buildingCode': searched.get('building_code', '') or road.get('building_code', '') or jibun.get(
    'building_code', ''),
    'buildingName': searched.get('building_name', '') or road.get('building_name', '') or jibun.get(
    'building_name', ''),
    'hname': searched.get('hname', ''),
    'apartment': searched.get('is_multi_building', '') == 'true',
    'roadname': searched.get('roadname', '') or road.get('roadname', ''),
    'roadnameCode': searched.get('roadname_code', '') or road.get('roadname_code', ''),
    'roadnameEnglish': searched.get('roadname_eng', '') or road.get('roadname_eng', ''),
    'sido': searched.get('sido', ''),
    'sidoEnglish': searched.get('sido_eng', ''),
    'sigungu': searched.get('sigungu', ''),
    'sigunguCode': searched.get('sigungu_code', ''),
    'sigunguEnglish': searched.get('sigungu_eng', ''),
    'zonecode': searched.get('zonecode', ''),

    'jibunAddress': jibun.get('addr', ''),
    'jibunAddressEnglish': jibun.get('addr_eng', ''),

    'roadAddress': road.get('addr', ''),
    'roadAddressEnglish': road.get('addr_eng', ''),
    }

    yield data


    if __name__ == '__main__':
    from pprint import pprint

    result = search_many_postcode([
    ('서울 강남구 가로수길 5', '06035'),
    ('서울 동작구 동작동 316', '06905'),
    ('없는주소', '12345'),
    ])
    pprint(result)
    """output
    {('서울 강남구 가로수길 5', '06035'): {'_from': 'html',
    'address': '서울 강남구 가로수길 5',
    'addressEnglish': '5, Garosu-gil, Gangnam-gu, '
    'Seoul, Korea',
    'addressType': 'R',
    'apartment': False,
    'bcode': '1168010700',
    'bname': '신사동',
    'bname1': '',
    'bname1English': '',
    'bname2': '신사동',
    'bname2English': 'Sinsa-dong',
    'bnameEnglish': 'Sinsa-dong',
    'buildingCode': '1168010700105370005011918',
    'buildingName': '',
    'hname': '',
    'jibunAddress': '서울 강남구 신사동 537-5',
    'jibunAddressEnglish': '537-5, Sinsa-dong, '
    'Gangnam-gu, Seoul, Korea',
    'query': '서울 강남구 가로수길 5',
    'roadAddress': '서울 강남구 가로수길 5',
    'roadAddressEnglish': '5, Garosu-gil, '
    'Gangnam-gu, Seoul, Korea',
    'roadname': '가로수길',
    'roadnameCode': '4858362',
    'roadnameEnglish': 'Garosu-gil',
    'sido': '서울',
    'sidoEnglish': 'Seoul',
    'sigungu': '강남구',
    'sigunguCode': '11680',
    'sigunguEnglish': 'Gangnam-gu',
    'userSelectedType': 'R',
    'zonecode': '06035'},
    ('서울 동작구 동작동 316', '06905'): {'_from': 'html',
    'address': '서울 동작구 동작동 316',
    'addressEnglish': '316, Dongjak-dong, '
    'Dongjak-gu, Seoul, Korea',
    'addressType': 'J',
    'apartment': False,
    'bcode': '1159010600',
    'bname': '동작동',
    'bname1': '',
    'bname1English': '',
    'bname2': '동작동',
    'bname2English': 'Dongjak-dong',
    'bnameEnglish': 'Dongjak-dong',
    'buildingCode': '1159010600103160000000001',
    'buildingName': '반포수난구조대',
    'hname': '사당2동',
    'jibunAddress': '서울 동작구 동작동 316',
    'jibunAddressEnglish': '316, Dongjak-dong, '
    'Dongjak-gu, Seoul, '
    'Korea',
    'query': '서울 동작구 동작동 316',
    'roadAddress': '서울 동작구 동작대로 335-1',
    'roadAddressEnglish': '335-1, Dongjak-daero, '
    'Dongjak-gu, Seoul, Korea',
    'roadname': '동작대로',
    'roadnameCode': '2005009',
    'roadnameEnglish': 'Dongjak-daero',
    'sido': '서울',
    'sidoEnglish': 'Seoul',
    'sigungu': '동작구',
    'sigunguCode': '11590',
    'sigunguEnglish': 'Dongjak-gu',
    'userSelectedType': 'J',
    'zonecode': '06905'},
    ('없는주소', '12345'): None}
    """