Skip to content

Instantly share code, notes, and snippets.

View EgleGrade's full-sized avatar

Egle Gradeckaite EgleGrade

View GitHub Profile
import re
import requests
import nltk
from lxml import html
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
# A Company Name
company_name = "The Boring Company"
search_terms = ["flamethrower"] # The Boring Company sells flamethrowers
import re
import nltk
from nltk.corpus import stopwords
import pandas as pd
stop_words = set(stopwords.words("english"))
def get_first_title(title):
# keep "co-founder, co-ceo, etc"
title = re.sub(r"[Cc]o[\-\ ]","", title)