import re import webbrowser import requests import PyPDF2 import io url = input("Enter the study guide URL: ") url_raw = requests.get(url).content url_contents = '' with io.BytesIO(url_raw) as open_pdf_file: read_pdf = PyPDF2.PdfReader(open_pdf_file) for page_number in range(len(read_pdf.pages)): page = read_pdf.pages[page_number] url_contents += '\n' + page.extract_text() for match in re.finditer(r'(S|F)\d\d(E\d|F\d|FE)#\d\d?', url_contents): match = match.group() webbrowser.open(f'https://www.math.purdue.edu/php-scripts/courses/oldexams/serve_file.php?file=26100{match[3:5]}-{match[0]}20{match[1:3]}.pdf') webbrowser.open(f'https://www.math.purdue.edu/php-scripts/courses/oldexams/serve_file.php?file=Ans-26100{match[3:5]}-{match[0]}20{match[1:3]}.pdf') if requests.get(f'https://www.math.purdue.edu/php-scripts/courses/oldexams/serve_file.php?file=Sol-26100{match[3:5]}-{match[0]}20{match[1:3]}.pdf').status_code != 404: webbrowser.open(f'https://www.math.purdue.edu/php-scripts/courses/oldexams/serve_file.php?file=Sol-26100{match[3:5]}-{match[0]}20{match[1:3]}.pdf')