This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import requests | |
| import nltk | |
| from lxml import html | |
| from nltk.corpus import stopwords | |
| stop_words = set(stopwords.words("english")) | |
| # A Company Name | |
| company_name = "The Boring Company" | |
| search_terms = ["flamethrower"] # The Boring Company sells flamethrowers |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import nltk | |
| from nltk.corpus import stopwords | |
| import pandas as pd | |
| stop_words = set(stopwords.words("english")) | |
| def get_first_title(title): | |
| # keep "co-founder, co-ceo, etc" | |
| title = re.sub(r"[Cc]o[\-\ ]","", title) |