Created
July 3, 2024 14:34
-
-
Save kristiyanto/b48ca3bb57a9ec701a23cf735def7748 to your computer and use it in GitHub Desktop.
NLP: Text Summarization for Medium Article
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Refer to the Jupyter Notebook and article for package imports and the complete code. | |
| def summarize(text, char_limit=80): | |
| doc = nlp(text.description) | |
| sentences = [sent.text.strip() for sent in doc.sents] | |
| keywords = text.keywords | |
| if not keywords or not sentences: | |
| return "" | |
| scores = [] | |
| for sentence in sentences: | |
| score = 0 | |
| for keyword in keywords: | |
| score += sentence.lower().count(keyword.lower()) # Ensure case-insensitive matching | |
| scores.append(score) | |
| sorted_sentences = [sent for _, sent in sorted(zip(scores, sentences), reverse=True)] | |
| output = "" | |
| for i, sentence in enumerate(sorted_sentences): | |
| if i == 0: | |
| output += sentence + " " | |
| continue | |
| elif (len(output) + len(sentence) + 1 <= char_limit): | |
| output += sentence + " " | |
| else: | |
| break | |
| return output.strip() | |
| data['summary'] = data.apply(summarize, axis=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment