""" This script will search 4chan (need to specify a board unfortunately) and then search all comments on Reddit for specific keywords. If found, it will then send an email with the links to any matching posts. Because 4chan posts are archived after ~48 hours I would recommend setting this up to run on that cadence as well. Sender email will need "Allow less secure apps" to ON or similar setting. I followed these instructions for setting that up: https://realpython.com/python-send-email/#option-1-setting-up-a-gmail-account-for-development 4chan search modified from https://gist.github.com/Taiiwo/7323144 Needs python3 Usage: python3 bad_place_notify.py 1/2 1: all: Search every reply to every thread on the board 2: onlyop: Search only the OP. Example: python3 bad_place_notify.py all """ import json, time, sys, smtplib, ssl from urllib.request import urlopen # Searches a specific 4chan board for key words. # Things are archived on 4chan after ~48 hours and they will no longer be searchable. # API Docs: https://github.com/4chan/4chan-API def search_4chan_replies(*args): # Search text in the replies of threads # All the search terms you want to look for. strings = ["TODO_1", "TODO_2", "TODO_ETC"] # The board you want to search board = "g" rawjson = urlopen('https://a.4cdn.org/' + board + '/catalog.json').read() # Getting around API limits time.sleep(1) parsedjson = json.loads(rawjson) count = 0 pagecount = 0 retme = [] for page in parsedjson: print('Searching 4chan page ' + str(count)) count += 1 threadcount = 0 for thread in page['threads']: if 'args' in locals(): if args[0] != 'onlyop': threadcount += 1 # Get thread number num = thread['no'] try: rawreplies = urlopen('https://a.4cdn.org/' + board + '/res/' + str(num) + '.json').read() except: print("Thread 404'd") break time.sleep(0.0001) parsedreplies = json.loads(rawreplies) for post in parsedreplies['posts']: if 'com' in post and any(x in post['com'] for x in strings): # (Thinking of checking post['name'] if num == post['no']: retme.append('http://boards.4chan.org/' + board + '/res/' + str(num)) else: retme.append('http://boards.4chan.org/' + board + '/res/' + str(num) + '#p' + str(post['no'])) else: pagecount += 1 if 'com' in thread: if any(x in thread['com'] for x in strings): retme.append('http://boards.4chan.org/' + board + '/res/' + str(thread['no'])) return retme # Searches all reddit comments for search terms # API Docs: https://github.com/pushshift/api def search_reddit_replies(): # The search terms we will look for in the last 3 days. Not case sensitive. strings = ["TODO_1", "TODO_2", "TODO_ETC"] results = [] for string in strings: rawjson = urlopen('https://api.pushshift.io/reddit/search/comment/?q=' + string + '&after=3d').read() parsedjson = json.loads(rawjson) print("Searching Reddit for " + string + "...") for data in parsedjson['data']: results.append("https://reddit.com" + data['permalink']) return results def emailMe(results): print('Emailing ' + results) smtp_server = "smtp.gmail.com" port = 587 # For starttls sender_email = "TODO_sender_email" receiver_email = "TODO_receiver_email" message = """\ Subject: You've Been Mentioned On A Bad Place :( """ password = 'TODO_sender_email_password' # Create a secure SSL context context = ssl.create_default_context() # Try to log in to server and send email try: server = smtplib.SMTP(smtp_server,port) server.ehlo() # Can be omitted server.starttls(context=context) # Secure the connection server.ehlo() # Can be omitted server.login(sender_email, password) server.sendmail(sender_email, receiver_email, message + results) print("Email sent!") except Exception as e: # Print any error messages to stdout print(e) finally: server.quit() def main(): results = search_4chan_replies(sys.argv[1]) + search_reddit_replies() if results: emailMe('\n'.join(results)) if __name__ == "__main__": main()