search.pyhttps://www.crummy.com/software/BeautifulSoup/bs4/doc/ #------------------------- # css selector #------------------------- css_soup.select("p.strikeout.body") soup.select("p nth-of-type(3)") # 3rd child soup.select("head > title") soup.select("p > a:nth-of-type(2)") soup.select("p > #link1") # direct child soup.select("#link1 ~ .sister") # sibling soup.select('a[href]') # existence of an attribute soup.select_one(".sister") # attribute value soup.select('a[href="http://example.com/elsie"]') # exact attribute soup.select('a[href^="http://example.com/"]') # negative match soup.select('a[href$="tillie"]') # end match soup.select('a[href*=".com/el"]') # middle match #------------------------- # basic #------------------------- soup.find_all('b') # match by tag soup.find_all(re.compile("^b")) # match by tag using regex soup.find_all(["a", "b"]) # match by tag in list # function (complex condition) def has_class_but_no_id(tag): return tag.has_attr('class') and not tag.has_attr('id') soup.find_all(has_class_but_no_id) #------------------------- # find_all_api #------------------------- find_all(name, attrs, recursive, string, limit, **kwargs) soup.find_all("title") # tag condition soup.find_all("p", "title") # tag and attr # [

The Dormouse's story

] soup.find_all("a") # keyword arguments soup.find_all(id="link2") soup.find_all(href=re.compile("elsie"), id='link1') soup.find(string=re.compile("sisters")) # text contain sisters # css class (class is researved keyword) soup.find_all("a", class_="sister")