outfilename= number_of_pages= #number of files downloaded from site for i in range(1,number_of_pages): data=urllib2.urlopen(outfilename) soup = BeautifulSoup(data) data=soup.prettify() soup = BeautifulSoup(data) ti=soup.findAll(attrs={'class':'link'}) for t in ti: t.extract() ti=soup.findAll(attrs={'class':'thead'}) for t in ti: t.extract() hr=soup.findAll('hr') for h in hr: h.extract() hr=soup.findAll('form') for h in hr: h.extract() hr=soup.findAll('head') for h in hr: h.extract() hr=soup.findAll('input') for h in hr: h.extract() print soup.prettify()