Last active
March 29, 2023 23:55
-
-
Save vu3jej/a46eb3d18aa7d8c808af8b8ca4df06a4 to your computer and use it in GitHub Desktop.
Revisions
-
vu3jej revised this gist
Feb 27, 2017 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ from colour_extractor import ColourExtractorStrict colours = ['blue', 'pink', 'lavender', 'heather'] extractor = ColourExtractorStrict(colours=colours) string = 'Available in a variety of colors, including bold blue heather, ebony, jazzberry pink heather, light steel, navy heather, new frosty lavender, plum port or slate heather' extractor.get(string=string) -
vu3jej created this gist
Feb 27, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,54 @@ import spacy class ColourExtractorStrict: """Extract colours along with adjectives""" def __init__(self, colours): self.colours = colours self.pos_ok = ['ADJ', 'NOUN'] self.tagger = spacy.load('en') def get(self, string): extracted = set() doc = self.tagger(string.lower()) pairs = [(word.text, word.pos_) for word in doc] for index, pair in enumerate(pairs): text, pos = pair if text in self.colours: text_ahead = self.look_ahead(pairs=pairs, index=index) text_behind = self.look_behind(pairs=pairs, index=index, colour_pos=pos) if text_behind: text_behind.append(text) if text_ahead: text_behind.extend(text_ahead) extracted.add(' '.join(text_behind)) else: extracted.add(' '.join(text_behind)) elif text_ahead: extracted.add(' '.join([text] + text_ahead)) else: extracted.add(text) return extracted if extracted else False def look_ahead(self, pairs, index): ahead = list() for text, pos in pairs[index + 1:]: if pos in self.pos_ok: ahead.append(text) else: break return ahead if ahead else False def look_behind(self, pairs, index, colour_pos): behind = list() for text, pos in reversed(pairs[:index]): if pos in self.pos_ok: behind.append(text) else: break return list(reversed(behind)) if behind else False This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,6 @@ from colour_extractor import ColourExtractorStrict colours = ['blue', 'pink', 'lavender'] extractor = ColourExtractorStrict(colours=colours) string = 'Available in a variety of colors, including bold blue heather, ebony, jazzberry pink heather, light steel, navy heather, new frosty lavender, plum port or slate heather' extractor.get(string=string)