Skip to content

Instantly share code, notes, and snippets.

@vu3jej
Last active March 29, 2023 23:55
Show Gist options
  • Select an option

  • Save vu3jej/a46eb3d18aa7d8c808af8b8ca4df06a4 to your computer and use it in GitHub Desktop.

Select an option

Save vu3jej/a46eb3d18aa7d8c808af8b8ca4df06a4 to your computer and use it in GitHub Desktop.

Revisions

  1. vu3jej revised this gist Feb 27, 2017. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions test.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    from colour_extractor import ColourExtractorStrict
    colours = ['blue', 'pink', 'lavender']

    colours = ['blue', 'pink', 'lavender', 'heather']
    extractor = ColourExtractorStrict(colours=colours)
    string = 'Available in a variety of colors, including bold blue heather, ebony, jazzberry pink heather, light steel, navy heather, new frosty lavender, plum port or slate heather'
    extractor.get(string=string)
  2. vu3jej created this gist Feb 27, 2017.
    54 changes: 54 additions & 0 deletions colour_extractor.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,54 @@
    import spacy


    class ColourExtractorStrict:
    """Extract colours along with adjectives"""

    def __init__(self, colours):
    self.colours = colours
    self.pos_ok = ['ADJ', 'NOUN']
    self.tagger = spacy.load('en')

    def get(self, string):
    extracted = set()
    doc = self.tagger(string.lower())
    pairs = [(word.text, word.pos_) for word in doc]
    for index, pair in enumerate(pairs):
    text, pos = pair
    if text in self.colours:
    text_ahead = self.look_ahead(pairs=pairs, index=index)
    text_behind = self.look_behind(pairs=pairs, index=index,
    colour_pos=pos)
    if text_behind:
    text_behind.append(text)
    if text_ahead:
    text_behind.extend(text_ahead)
    extracted.add(' '.join(text_behind))
    else:
    extracted.add(' '.join(text_behind))
    elif text_ahead:
    extracted.add(' '.join([text] + text_ahead))
    else:
    extracted.add(text)

    return extracted if extracted else False

    def look_ahead(self, pairs, index):
    ahead = list()
    for text, pos in pairs[index + 1:]:
    if pos in self.pos_ok:
    ahead.append(text)
    else:
    break

    return ahead if ahead else False

    def look_behind(self, pairs, index, colour_pos):
    behind = list()
    for text, pos in reversed(pairs[:index]):
    if pos in self.pos_ok:
    behind.append(text)
    else:
    break

    return list(reversed(behind)) if behind else False
    6 changes: 6 additions & 0 deletions test.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    from colour_extractor import ColourExtractorStrict

    colours = ['blue', 'pink', 'lavender']
    extractor = ColourExtractorStrict(colours=colours)
    string = 'Available in a variety of colors, including bold blue heather, ebony, jazzberry pink heather, light steel, navy heather, new frosty lavender, plum port or slate heather'
    extractor.get(string=string)