#! usr/bin/python #Filename: rsquery2.py import re, urllib, time rsnumbers = [] f = open('ancestral0624', 'a') urltest = re.compile prefix = "http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=" rsn = open("snplist") for line in rsn: rsnumbers.append(line) for line in rsnumbers: try: entrez = urllib.urlopen(prefix + line).read() time.sleep(.5) text = entrez.split("Ancestral Allele")[-1].split("Clinical Association")[0] output = text.split('f1f1f1">')[-1].split('')[0] final = output, line finalstr = str(final) f.write("%s\n" % finalstr) except IOError: time.sleep(5) f.close()