Skip to content

Instantly share code, notes, and snippets.

@liiight
Last active November 3, 2019 21:23
Show Gist options
  • Select an option

  • Save liiight/ccff4ddb199b31163c7a to your computer and use it in GitHub Desktop.

Select an option

Save liiight/ccff4ddb199b31163c7a to your computer and use it in GitHub Desktop.

Revisions

  1. liiight revised this gist Sep 8, 2015. 1 changed file with 44 additions and 36 deletions.
    80 changes: 44 additions & 36 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -91,43 +91,51 @@ class SmartIMDB(object):
    movie_queue: add
    """
    genres_schema = {'oneOf': [{
    'type': 'object',
    'properties': {
    'genres': {"type": "array", "items": {"type": "string"}},
    'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact']}
    },
    'required': ['genres'],
    'additionalProperties': False},
    {'type': 'string'}]
    genres_schema = {
    'oneOf': [
    {
    'type': 'object',
    'properties': {
    'genres': {"type": "array", "items": {"type": "string"}},
    'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact']}
    },
    'required': ['genres'],
    'additionalProperties': False},
    {'type': 'string'}
    ]
    }

    job_types = {'type': 'string', 'enum': JOB_TYPES}
    content_types = {'type': 'string', 'enum': CONTENT_TYPES}

    schema = {'oneOf': [{'type': 'string'},
    {'type': 'object',
    'properties': {
    'id': {'type': 'string'},
    'job_types': {'oneOf': [
    {'type': 'array', 'items': job_types},
    job_types
    ]},
    'content_types': {'oneOf': [
    {'type': 'array', 'items': content_types},
    content_types
    ]},
    'include_genres': genres_schema,
    'exclude_genres': genres_schema,
    'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
    'votes': {'type': 'number'},
    'years': {'type': ['string', 'number'], 'format': 'year_format'},
    'max_entries': {'type': 'number'},
    'strict_mode': {'type': 'boolean'}
    },
    'required': ['id'],
    'additionalProperties': False}]
    }
    schema = {
    'oneOf': [
    {'type': 'string'},
    {'type': 'object',
    'properties': {
    'id': {'type': 'string'},
    'job_types': {
    'oneOf': [
    {'type': 'array', 'items': job_types},
    job_types
    ]},
    'content_types': {
    'oneOf': [
    {'type': 'array', 'items': content_types},
    content_types
    ]},
    'include_genres': genres_schema,
    'exclude_genres': genres_schema,
    'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
    'votes': {'type': 'number'},
    'years': {'type': ['string', 'number'], 'format': 'year_format'},
    'max_entries': {'type': 'number'},
    'strict_mode': {'type': 'boolean'}
    },
    'required': ['id'],
    'additionalProperties': False}
    ]
    }

    def entity_type_and_object(self, imdb_id):
    """
    @@ -167,7 +175,7 @@ def items_by_entity(self, entity_type, entity_object, content_types, job_types):

    elif entity_type == 'Person':
    if 'actor' in job_types:
    job_types.append('actress') # Special case: Actress are listed differently than actor
    job_types.append('actress') # Special case: Actress are listed differently than actor
    for job_type in job_types:
    for content_type in content_types:
    job_and_content = job_type + ' ' + content_type
    @@ -366,9 +374,9 @@ def on_task_input(self, task, config):
    else:
    year_test = (item.get('year') >= year_range[0] and (item.get('year') <= year_range[1]))
    log.debug(u'Item year: {0} is in given range of {1} and {2}: {3}'.format(str((item.get('year'))),
    str(year_range[0]),
    str(year_range[1]),
    str(year_test)))
    str(year_range[0]),
    str(year_range[1]),
    str(year_test)))
    else:
    log.debug('No years test required, skipping year test.')
    year_test = True
  2. liiight revised this gist Sep 8, 2015. 1 changed file with 97 additions and 22 deletions.
    119 changes: 97 additions & 22 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -30,6 +30,67 @@


    class SmartIMDB(object):
    """
    This plugin enables generating entries based on an entity, an entity being a person, character or company.
    It's based on IMDBpy which is required (pip install imdbpy). The basic config required just an IMDB ID of the
    required entity.
    For example:
    smart_imdb: 'http://www.imdb.com/character/ch0001354/?ref_=tt_cl_t1'
    ID format is not important as relevant ID is captured via regex.
    Schema description:
    Other than ID, all other properties are meant to filter the full list that the entity generates.
    id: string that relates to a supported entity type. For example: 'nm0000375'. Required.
    job_types: a string or list with job types from JOB_TYPES. Default is 'actor'.
    content_types: A string or list with content types from CONTENT_TYPES. Default is 'movie'.
    include_genres: A string or list with genres to include when matching a movie. Can also contain match_type,
    which decided on the filter type.
    If match_type is 'any', if ANY of the included genres are listed in the filtered movie, it will pass the filter.
    If match_type is 'all, if ALL of the included genres are listed in the filtered movie, it will pass the filter.
    If match_type is 'exact, if EXACTLY all of the included genres are listed in the filtered movie,
    it will pass the filter. Default match_type is 'any'.
    exclude_genres: Exactly like include_genres but relates to which genres the item should not hold.
    rating: A number between 0 and 10 that will be matched against the rating of the movie. If movie rating is higher
    or equal, it will pass the filter.
    votes: A number that will be matched against the votes of the movie. If movie number of votes is higher
    or equal, it will pass the filter.
    years: A string that determines which years to filter. For example:
    2004: If movie year is 2004, it will pass filter.
    2004-: If movie year is 2004 and higher, it will pass filter.
    -2004: If movie year is before 2004, it will pass filter.
    2000-2004: If movie year is between 2000 and 2004, it will pass filter.
    max_entries: The maximum number of entries that can return. This value's purpose is basically flood protection
    against unruly configurations that will return too many results. Default is 200.
    strict_mode: A boolean value that determines what to do in case an item does not have year, rating or votes listed
    and the configuration holds any of those. If set to 'True', it will cause an item that does not hold one of
    these properties to fail the filter. Default is 'False'.
    Advanced config example:
    smart_movie_queue:
    smart_imdb:
    id: 'http://www.imdb.com/company/co0051941/?ref_=fn_al_co_2'
    job_types:
    - actor
    - director
    content_types:
    - tv series
    rating: 5.6
    include_genres:
    genres:
    - action
    - comedy
    match_type: any
    exclude_genres: animation
    years: '2005-'
    strict_mode: yes
    accept_all: yes
    movie_queue: add
    """
    genres_schema = {'oneOf': [{
    'type': 'object',
    'properties': {
    @@ -48,13 +109,19 @@ class SmartIMDB(object):
    {'type': 'object',
    'properties': {
    'id': {'type': 'string'},
    'job_types': {'oneOf': [{'type': 'array', 'items': job_types}, job_types]},
    'content_types': {'oneOf': [{'type': 'array', 'items': content_types}, content_types]},
    'job_types': {'oneOf': [
    {'type': 'array', 'items': job_types},
    job_types
    ]},
    'content_types': {'oneOf': [
    {'type': 'array', 'items': content_types},
    content_types
    ]},
    'include_genres': genres_schema,
    'exclude_genres': genres_schema,
    'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
    'votes': {'type': 'number'},
    'years': {'type': 'string', 'format': 'year_format'},
    'years': {'type': ['string', 'number'], 'format': 'year_format'},
    'max_entries': {'type': 'number'},
    'strict_mode': {'type': 'boolean'}
    },
    @@ -100,7 +167,7 @@ def items_by_entity(self, entity_type, entity_object, content_types, job_types):

    elif entity_type == 'Person':
    if 'actor' in job_types:
    job_types.append('actress')
    job_types.append('actress') # Special case: Actress are listed differently than actor
    for job_type in job_types:
    for content_type in content_types:
    job_and_content = job_type + ' ' + content_type
    @@ -109,7 +176,7 @@ def items_by_entity(self, entity_type, entity_object, content_types, job_types):
    if movies_by_job_type:
    for movie in movies_by_job_type:
    if movie not in movies:
    log.verbose('Found item: ' + movie.get('title') + ' ,adding to raw list')
    log.verbose('Found item: ' + movie.get('title') + ' ,adding to unfiltered list')
    movies.append(movie)
    else:
    log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
    @@ -229,6 +296,9 @@ def prepare_config(self, config):
    config['exclude_genres'] = {'genres': [config.get('exclude_genres')]}
    config['exclude_genres'].setdefault('match_type', 'any')

    if config.get('years'):
    config['years'] = str(config.get('years'))

    return config

    def on_task_input(self, task, config):
    @@ -239,9 +309,14 @@ def on_task_input(self, task, config):
    include_list = config.get('include_genres')
    exclude_list = config.get('exclude_genres')

    entity_type, entity_object = self.entity_type_and_object(config.get('id'))
    try:
    entity_type, entity_object = self.entity_type_and_object(config.get('id'))
    except Exception as e:
    log.error('Could not resolve entity via ID. Either error in config or unsupported entity: %s' % e)
    return

    items = self.items_by_entity(entity_type, entity_object,
    config.get('content_types', []), config.get('job_types', []))
    config.get('content_types'), config.get('job_types'))

    if not items:
    log.error('Could not get IMDB item list, check your configuration.')
    @@ -254,27 +329,27 @@ def on_task_input(self, task, config):
    try:
    ia.update(item)
    except Exception as e:
    log.error('An error has occurred, cannot get movie data: %s' % e)
    log.error('An error has occurred, cannot get item data: %s' % e)
    continue

    log.debug(
    'Testing if movie: ' + item.get('long imdb canonical title') + ' qualifies for adding to entries.')
    'Testing if item: ' + item.get('long imdb canonical title') + ' qualifies for adding to entries.')

    type_test = item.get('kind') in config.get('content_types')
    log.debug('Movie kind: ' + item.get('kind') + ' found in config types ' +
    log.debug('Item kind: ' + item.get('kind') + ' found in config types ' +
    ', '.join(config.get('content_types')) + ': ' + str(type_test))

    if config.get('rating'):
    if not item.get('rating'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have rating value, skipping movie.')
    log.debug('Strict mode: Item does not have rating value, skipping movie.')
    rating_test = False
    else:
    log.debug('Movie does not have rating listed, skipping test.')
    log.debug('Item does not have rating listed, skipping test.')
    rating_test = True
    else:
    rating_test = float(item.get('rating')) >= config.get('rating', 1)
    log.debug('Movie rating: ' + str(item.get('rating')) +
    log.debug('Item rating: ' + str(item.get('rating')) +
    ' is higher or equal to: ' + str(config.get('rating', '1')) + ': ' + str(rating_test))
    else:
    log.debug('No rating test required, skipping rating test.')
    @@ -283,14 +358,14 @@ def on_task_input(self, task, config):
    if config.get('years'):
    if not item.get('year'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have year value, skipping movie.')
    log.debug('Strict mode: Item does not have year value, skipping movie.')
    year_test = False
    else:
    log.debug('Movie does not have year listed, skipping test.')
    log.debug('Item does not have year listed, skipping test.')
    year_test = True
    else:
    year_test = (item.get('year') >= year_range[0] and (item.get('year') <= year_range[1]))
    log.debug(u'Movie year: {0} is in given range of {1} and {2}: {3}'.format(str((item.get('year'))),
    log.debug(u'Item year: {0} is in given range of {1} and {2}: {3}'.format(str((item.get('year'))),
    str(year_range[0]),
    str(year_range[1]),
    str(year_test)))
    @@ -301,14 +376,14 @@ def on_task_input(self, task, config):
    if config.get('votes'):
    if not item.get('votes'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have votes value, skipping movie.')
    log.debug('Strict mode: Item does not have votes value, skipping movie.')
    votes_test = False
    else:
    log.debug('Movie does not have votes listed, skipping test.')
    log.debug('Item does not have votes listed, skipping test.')
    votes_test = True
    else:
    votes_test = int(item.get('votes')) >= config.get('votes', 1)
    log.debug('Movie votes: ' + str(item.get('votes')) +
    log.debug('Item votes: ' + str(item.get('votes')) +
    ' are higher or equal to: ' + str(config.get('votes', '1')) + ': ' + str(votes_test))
    else:
    log.debug('No votes test required, skipping votes test.')
    @@ -349,9 +424,9 @@ def on_task_input(self, task, config):
    if len(entries) <= config.get('max_entries'):
    return entries
    else:
    log.error(
    'Number of entries (%s) exceeds maximum allowed value %s. '
    'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
    log.warning(
    'Number of entries (%s) exceeds maximum allowed value %s. \
    Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
    len(entries), config.get('max_entries')))

    return
  3. liiight revised this gist Sep 7, 2015. 1 changed file with 25 additions and 4 deletions.
    29 changes: 25 additions & 4 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -21,22 +21,24 @@
    YEAR_FORMATS = [r'^((19|20)\d{2})$', r'^\-((19|20)\d{2})$', r'^((19|20)\d{2})\-$', r'^((19|20)\d{2})\-((19|20)\d{2}$)']
    ENTITIES_FORMATS = {
    'Person': r'nm(\d{7})',
    'Company': r'co(\d{7})'
    'Company': r'co(\d{7})',
    'Character': r'ch(\d{7})'
    }

    MIN_YEAR_RANGE = 1900
    MAX_YEAR_RANGE = 2099


    class SmartIMDB(object):
    genres_schema = {
    genres_schema = {'oneOf': [{
    'type': 'object',
    'properties': {
    'genres': {"type": "array", "items": {"type": "string"}},
    'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact'], 'default': 'any'}
    'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact']}
    },
    'required': ['genres'],
    'additionalProperties': False
    'additionalProperties': False},
    {'type': 'string'}]
    }

    job_types = {'type': 'string', 'enum': JOB_TYPES}
    @@ -75,6 +77,9 @@ def entity_type_and_object(self, imdb_id):
    elif imdb_entity_type == 'Company':
    log.info('Starting to retrieve items for company: %s' % ia.get_company(m.group(1)))
    return imdb_entity_type, ia.get_company(m.group(1))
    elif imdb_entity_type == 'Character':
    log.info('Starting to retrieve items for Character: %s' % ia.get_character(m.group(1)))
    return imdb_entity_type, ia.get_character(m.group(1))

    def items_by_entity(self, entity_type, entity_object, content_types, job_types):
    """
    @@ -90,6 +95,9 @@ def items_by_entity(self, entity_type, entity_object, content_types, job_types):
    if entity_type == 'Company':
    return entity_object.get('production companies')

    if entity_type == 'Character':
    return entity_object.get('feature')

    elif entity_type == 'Person':
    if 'actor' in job_types:
    job_types.append('actress')
    @@ -197,6 +205,7 @@ def prepare_config(self, config):
    """
    if not isinstance(config, dict):
    config = {'id': config}

    config.setdefault('content_types', [CONTENT_TYPES[0]])
    config.setdefault('job_types', [JOB_TYPES[0]])
    config.setdefault('max_entries', 200)
    @@ -205,9 +214,21 @@ def prepare_config(self, config):
    if isinstance(config.get('content_types'), str_types):
    log.debug('Converted content type from string to list.')
    config['content_types'] = [config['content_types']]

    if isinstance(config['job_types'], str_types):
    log.debug('Converted job type from string to list.')
    config['job_types'] = [config['job_types']]

    if config.get('include_genres'):
    if isinstance(config.get('include_genres'), str_types):
    config['include_genres'] = {'genres': [config.get('include_genres')]}
    config['include_genres'].setdefault('match_type', 'any')

    if config.get('exclude_genres'):
    if isinstance(config.get('exclude_genres'), str_types):
    config['exclude_genres'] = {'genres': [config.get('exclude_genres')]}
    config['exclude_genres'].setdefault('match_type', 'any')

    return config

    def on_task_input(self, task, config):
  4. liiight revised this gist Sep 7, 2015. 1 changed file with 129 additions and 118 deletions.
    247 changes: 129 additions & 118 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -18,8 +18,11 @@
    'editor', 'miscellaneous', 'editorial department', 'cinematographer',
    'visual effects', 'thanks', 'music department']
    CONTENT_TYPES = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']
    # TODO Improve year regex detection
    YEAR_FORMATS = [r'^((19|20)\d{2})$', r'^\-((19|20)\d{2})$', r'^((19|20)\d{2})\-$', r'^((19|20)\d{2})\-((19|20)\d{2}$)']
    ENTITIES_FORMATS = {
    'Person': r'nm(\d{7})',
    'Company': r'co(\d{7})'
    }

    MIN_YEAR_RANGE = 1900
    MAX_YEAR_RANGE = 2099
    @@ -36,98 +39,82 @@ class SmartIMDB(object):
    'additionalProperties': False
    }

    person_schema = {
    'type': 'object',
    'properties': {
    'name': {'type': 'string'},
    'id': {'type': 'string'},
    'job_types': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES}, 'default': ['actor']},
    'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES}, 'default': ['movie']}
    },
    'oneOf': [{'required': ['name']}, {'required': ['id']}],
    'error_oneOf': 'Either a name of ID are required.',
    'additionalProperties': False
    }

    schema = {
    'type': 'object',
    'properties': {
    'person': person_schema,
    'include_genres': genres_schema,
    'exclude_genres': genres_schema,
    'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
    'votes': {'type': 'number'},
    'years': {'type': 'string', 'format': 'year_format'},
    'max_entries': {'type': 'number', 'default': 200},
    'strict_mode': {'type': 'boolean', 'default': False}
    },
    'required': ['person'],
    'additionalProperties': False

    }

    @staticmethod
    def is_imdb_id(string):
    job_types = {'type': 'string', 'enum': JOB_TYPES}
    content_types = {'type': 'string', 'enum': CONTENT_TYPES}

    schema = {'oneOf': [{'type': 'string'},
    {'type': 'object',
    'properties': {
    'id': {'type': 'string'},
    'job_types': {'oneOf': [{'type': 'array', 'items': job_types}, job_types]},
    'content_types': {'oneOf': [{'type': 'array', 'items': content_types}, content_types]},
    'include_genres': genres_schema,
    'exclude_genres': genres_schema,
    'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
    'votes': {'type': 'number'},
    'years': {'type': 'string', 'format': 'year_format'},
    'max_entries': {'type': 'number'},
    'strict_mode': {'type': 'boolean'}
    },
    'required': ['id'],
    'additionalProperties': False}]
    }

    def entity_type_and_object(self, imdb_id):
    """
    Checks if a given string matches IMDB id format
    :param string: Supposed IMDB ID
    :return: True if it matches
    Return a tuple of entity type and entity object
    :param imdb_id: string which contains IMDB id
    :return: entity type, entity object (person, company, etc.)
    """
    return re.search('[a-zA-Z]{2}(\d{7})', string)

    def get_all_movies_by_person(self, person):
    for imdb_entity_type, imdb_entity_format in ENTITIES_FORMATS.items():
    m = re.search(imdb_entity_format, imdb_id)
    if m:
    if imdb_entity_type == 'Person':
    log.info('Starting to retrieve items for person: %s' % ia.get_person(m.group(1)))
    return imdb_entity_type, ia.get_person(m.group(1))
    elif imdb_entity_type == 'Company':
    log.info('Starting to retrieve items for company: %s' % ia.get_company(m.group(1)))
    return imdb_entity_type, ia.get_company(m.group(1))

    def items_by_entity(self, entity_type, entity_object, content_types, job_types):
    """
    Gets a person object from config and return all movies according to parameters
    :param person:
    :return: Movie list
    Gets entity object and return movie list
    :param entity_type: Person, company, etc.
    :param entity_object: The object itself
    :param content_types: as defined in config
    :param job_types: As defined in config
    :return:
    """
    # TODO split this to (at least) two methods
    movies = []

    if person.get('id'):
    if self.is_imdb_id(person.get('id')):
    log.debug('Detected IMDB ID %s, resolving person.' % person.get('id'))
    m = self.is_imdb_id(person.get('id'))
    IMDB_person = ia.get_person(m.group(1))
    else:
    log.error('Could not resolve person from IMDB ID %s' % person.get('id'))
    return
    else:
    log.debug('Trying to search for person: %s' % person.get('name'))
    person_list = ia.search_person(person.get('name'))
    if person_list:
    IMDB_person = ia.get_person(person_list[0].personID)
    else:
    return

    # Special case: Actress and actor are different roles.
    if 'actor' in person.get('job_types'):
    person['job_types'].append('actress')

    log.info('Getting movies for %s with the following job types: %s' %
    (IMDB_person['name'], ' ,'.join(person.get('job_types'))))
    for job_type in person['job_types']:
    for content_type in person.get('content_types'):
    job_and_content = job_type + ' ' + content_type
    log.debug('Searching for movies that correlates to: ' + job_and_content)
    movies_by_job_type = IMDB_person.get(job_and_content, IMDB_person.get(job_type))
    if movies_by_job_type:
    for movie in movies_by_job_type:
    if movie not in movies:
    log.debug('Adding movie: ' + movie.get('title'))
    movies.append(movie)
    else:
    log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
    return movies

    def parse_year(self, year=None):
    if entity_type == 'Company':
    return entity_object.get('production companies')

    elif entity_type == 'Person':
    if 'actor' in job_types:
    job_types.append('actress')
    for job_type in job_types:
    for content_type in content_types:
    job_and_content = job_type + ' ' + content_type
    log.debug('Searching for movies that correlates to: ' + job_and_content)
    movies_by_job_type = entity_object.get(job_and_content, entity_object.get(job_type))
    if movies_by_job_type:
    for movie in movies_by_job_type:
    if movie not in movies:
    log.verbose('Found item: ' + movie.get('title') + ' ,adding to raw list')
    movies.append(movie)
    else:
    log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
    return movies

    def parse_year(self, year):
    """
    Receives 'year_range_format' string and parses it to return start and end dates
    :param year:
    :return: Tuple with start and end year. Uses max an min year values if either one is needed
    """
    if not year:
    log.debug('No year filter in config, returning defaults')
    log.debug('No year filter in config, returning defaults.')
    return MIN_YEAR_RANGE, MAX_YEAR_RANGE

    for i in range(len(YEAR_FORMATS)):
    @@ -151,7 +138,7 @@ def parse_year(self, year=None):
    def clean_list(self, genres_list):
    """
    Gets a list and return a new list with lowercase elements
    :param list:
    :param genres_list: List of genres to clean
    :return:
    """
    if not isinstance(genres_list, list):
    @@ -167,7 +154,8 @@ def genres_match(self, user_genres, movie_genres):
    Takes a genres object from config schema and tries to match it with movie genres according to match type.
    If match type is 'any', will return true if any of the user specified genres exists in the movie genres list.
    if match type is 'all' will return true if all of the user specified genres exists in the movie genres list.
    if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres list.
    if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres
    list.
    :param user_genres: Genres from config
    :param movie_genres: Movie genres
    :return:
    @@ -178,8 +166,8 @@ def genres_match(self, user_genres, movie_genres):
    user_genres_list = self.clean_list(user_genres.get('genres'))
    movie_genres_list = self.clean_list(movie_genres)
    match_type = user_genres.get('match_type')
    log.debug('Matching user genres: ' + ', '.join(user_genres_list) + ' with movie genres: '
    + ', '.join(movie_genres_list) + ' and match type: ' + match_type)
    log.debug('Matching user genres: ' + ', '.join(user_genres_list) + ' with movie genres: ' +
    ', '.join(movie_genres_list) + ' and match type: ' + match_type)

    if match_type == 'any':
    return bool(
    @@ -203,106 +191,129 @@ def is_year_format(instance):
    raise ValueError('Invalid year format, or years out of range of %d to %d. Please check config.'
    % (MIN_YEAR_RANGE, MAX_YEAR_RANGE))

    def prepare_config(self, config):
    """
    Converts config to dict form and sets defaults if needed
    """
    if not isinstance(config, dict):
    config = {'id': config}
    config.setdefault('content_types', [CONTENT_TYPES[0]])
    config.setdefault('job_types', [JOB_TYPES[0]])
    config.setdefault('max_entries', 200)
    config.setdefault('strict_mode', False)

    if isinstance(config.get('content_types'), str_types):
    log.debug('Converted content type from string to list.')
    config['content_types'] = [config['content_types']]
    if isinstance(config['job_types'], str_types):
    log.debug('Converted job type from string to list.')
    config['job_types'] = [config['job_types']]
    return config

    def on_task_input(self, task, config):
    entries = []

    person = config.get('person')
    config = self.prepare_config(config)

    include_list = config.get('include_genres')
    exclude_list = config.get('exclude_genres')
    movies = self.get_all_movies_by_person(person)

    if not movies:
    log.error('Could not get movie list, check your configuration.')
    entity_type, entity_object = self.entity_type_and_object(config.get('id'))
    items = self.items_by_entity(entity_type, entity_object,
    config.get('content_types', []), config.get('job_types', []))

    if not items:
    log.error('Could not get IMDB item list, check your configuration.')
    return

    year_range = self.parse_year(config.get('years'))
    log.info('Retrieved %d movies, starting to filter.' % len(movies))
    log.info('Retrieved %d items, starting to filter list.' % len(items))

    for movie in movies:
    for item in items:
    try:
    ia.update(movie)
    ia.update(item)
    except Exception as e:
    log.error('An error has occurred, cannot get movie data: %s' % e)
    continue

    log.debug(
    'Testing if movie: ' + movie.get('long imdb canonical title') + ' qualifies for adding to entries.')
    'Testing if movie: ' + item.get('long imdb canonical title') + ' qualifies for adding to entries.')

    type_test = movie.get('kind') in person.get('content_types')
    log.debug('Movie kind: ' + movie.get('kind') + ' found in config types ' +
    ', '.join(person.get('content_types')) + ': ' + str(type_test))
    type_test = item.get('kind') in config.get('content_types')
    log.debug('Movie kind: ' + item.get('kind') + ' found in config types ' +
    ', '.join(config.get('content_types')) + ': ' + str(type_test))

    if config.get('rating'):
    if not movie.get('rating'):
    if not item.get('rating'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have rating value, skipping movie.')
    rating_test = False
    else:
    log.debug('Movie does not have rating listed, skipping test.')
    rating_test = True
    else:
    rating_test = float(movie.get('rating')) >= config.get('rating', 1)
    log.debug('Movie rating: ' + str(movie.get('rating')) +
    rating_test = float(item.get('rating')) >= config.get('rating', 1)
    log.debug('Movie rating: ' + str(item.get('rating')) +
    ' is higher or equal to: ' + str(config.get('rating', '1')) + ': ' + str(rating_test))
    else:
    log.debug('No rating test required, skipping rating test.')
    rating_test = True

    if config.get('years'):
    if not movie.get('year'):
    if not item.get('year'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have year value, skipping movie.')
    year_test = False
    else:
    log.debug('Movie does not have year listed, skipping test.')
    year_test = True
    else:
    year_test = (movie.get('year') >= year_range[0] and (movie.get('year') <= year_range[1]))
    log.debug('Movie year: ' + str((movie.get('year'))) + ' is in given range of '
    + str(year_range[0]) + ' and ' + str(year_range[1]) + ': ' + str(year_test))
    year_test = (item.get('year') >= year_range[0] and (item.get('year') <= year_range[1]))
    log.debug(u'Movie year: {0} is in given range of {1} and {2}: {3}'.format(str((item.get('year'))),
    str(year_range[0]),
    str(year_range[1]),
    str(year_test)))
    else:
    log.debug('No years test required, skipping year test.')
    year_test = True

    if config.get('votes'):
    if not movie.get('votes'):
    if not item.get('votes'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have votes value, skipping movie.')
    votes_test = False
    else:
    log.debug('Movie does not have votes listed, skipping test.')
    votes_test = True
    else:
    votes_test = int(movie.get('votes')) >= config.get('votes', 1)
    log.debug('Movie votes: ' + str(movie.get('votes')) +
    votes_test = int(item.get('votes')) >= config.get('votes', 1)
    log.debug('Movie votes: ' + str(item.get('votes')) +
    ' are higher or equal to: ' + str(config.get('votes', '1')) + ': ' + str(votes_test))
    else:
    log.debug('No votes test required, skipping votes test.')
    votes_test = True

    if exclude_list:
    exclude_test = not self.genres_match(exclude_list, movie.get('genres', []))
    log.debug('Exclude genres: ' + ', '.join(
    exclude_list.get('genres', [])) + ' with match type ' + exclude_list.get(
    'match_type') + ' are not found in movie genres: ' + ', '.join(movie.get('genres', [])) +
    ': ' + str(exclude_test))
    exclude_test = not self.genres_match(exclude_list, item.get('genres', []))
    log.debug('Exclude genres: ' + ', '.join(exclude_list.get('genres', [])) + ' with match type ' +
    exclude_list.get('match_type') + ' are not found in item genres: ' +
    ', '.join(item.get('genres', [])) + ': ' + str(exclude_test))
    else:
    log.debug('No genres exclude test required, skipping exclude list test.')
    exclude_test = True

    if include_list:
    include_test = self.genres_match(include_list, movie.get('genres', []))
    log.debug('Include genres: ' + ', '.join(
    include_list.get('genres', [])) + ' with match type ' + include_list.get(
    'match_type') + ' are found in movie genres: ' + ', '.join(movie.get('genres', [])) +
    ': ' + str(include_test))
    include_test = self.genres_match(include_list, item.get('genres', []))
    log.debug('Include genres: ' + ', '.join(include_list.get('genres', [])) +
    ' with match type ' + include_list.get('match_type') + ' are found in item genres: ' +
    ', '.join(item.get('genres', [])) + ': ' + str(include_test))
    else:
    log.debug('No genres include test required, skipping include list test.')
    include_test = True

    if type_test and rating_test and year_test and votes_test and exclude_test and include_test:
    entry = Entry(title=movie['title'],
    imdb_id='tt' + ia.get_imdbID(movie),
    entry = Entry(title=item['title'],
    imdb_id='tt' + ia.get_imdbID(item),
    url='')
    if entry.isvalid():
    if entry not in entries:
  5. liiight revised this gist Sep 2, 2015. 1 changed file with 30 additions and 30 deletions.
    60 changes: 30 additions & 30 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,6 @@

    import logging
    import re
    from imdb._exceptions import IMDbDataAccessError
    from jsonschema.compat import str_types
    from flexget import plugin
    from flexget.event import event
    @@ -42,7 +41,7 @@ class SmartIMDB(object):
    'properties': {
    'name': {'type': 'string'},
    'id': {'type': 'string'},
    'jobtypes': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES}, 'default': ['actor']},
    'job_types': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES}, 'default': ['actor']},
    'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES}, 'default': ['movie']}
    },
    'oneOf': [{'required': ['name']}, {'required': ['id']}],
    @@ -100,26 +99,26 @@ def get_all_movies_by_person(self, person):
    IMDB_person = ia.get_person(person_list[0].personID)
    else:
    return

    if person.get('jobtypes') == ['actor']: # Special case, since 'actor' and 'actress' are separate keys in object
    log.info('Getting movies for actor/actress: %s' % IMDB_person['name'])
    return IMDB_person.get('actor', IMDB_person.get('actress'))
    else:
    log.info('Getting movies for %s with the following jobtypes: %s' %
    (IMDB_person['name'], ' ,'.join(person.get('jobtypes'))))
    for jobtype in person['jobtypes']:
    for content_type in person.get('content_types'):
    job_and_content = jobtype + ' ' + content_type
    log.debug('Searching for movies that correlates to: ' + job_and_content)
    movies_by_jobtype = IMDB_person.get(job_and_content, IMDB_person.get(jobtype))
    if movies_by_jobtype:
    for movie in movies_by_jobtype:
    if movie not in movies:
    log.debug('Adding movie: ' + str(movie))
    movies.append(movie)
    else:
    log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
    return movies
    # Special case: Actress and actor are different roles.
    if 'actor' in person.get('job_types'):
    person['job_types'].append('actress')

    log.info('Getting movies for %s with the following job types: %s' %
    (IMDB_person['name'], ' ,'.join(person.get('job_types'))))
    for job_type in person['job_types']:
    for content_type in person.get('content_types'):
    job_and_content = job_type + ' ' + content_type
    log.debug('Searching for movies that correlates to: ' + job_and_content)
    movies_by_job_type = IMDB_person.get(job_and_content, IMDB_person.get(job_type))
    if movies_by_job_type:
    for movie in movies_by_job_type:
    if movie not in movies:
    log.debug('Adding movie: ' + movie.get('title'))
    movies.append(movie)
    else:
    log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
    return movies

    def parse_year(self, year=None):
    """
    @@ -169,8 +168,8 @@ def genres_match(self, user_genres, movie_genres):
    If match type is 'any', will return true if any of the user specified genres exists in the movie genres list.
    if match type is 'all' will return true if all of the user specified genres exists in the movie genres list.
    if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres list.
    :param user_genres:
    :param movie_genres:
    :param user_genres: Genres from config
    :param movie_genres: Movie genres
    :return:
    """
    if not bool(user_genres):
    @@ -179,6 +178,8 @@ def genres_match(self, user_genres, movie_genres):
    user_genres_list = self.clean_list(user_genres.get('genres'))
    movie_genres_list = self.clean_list(movie_genres)
    match_type = user_genres.get('match_type')
    log.debug('Matching user genres: ' + ', '.join(user_genres_list) + ' with movie genres: '
    + ', '.join(movie_genres_list) + ' and match type: ' + match_type)

    if match_type == 'any':
    return bool(
    @@ -202,7 +203,6 @@ def is_year_format(instance):
    raise ValueError('Invalid year format, or years out of range of %d to %d. Please check config.'
    % (MIN_YEAR_RANGE, MAX_YEAR_RANGE))


    def on_task_input(self, task, config):
    entries = []

    @@ -229,8 +229,8 @@ def on_task_input(self, task, config):
    'Testing if movie: ' + movie.get('long imdb canonical title') + ' qualifies for adding to entries.')

    type_test = movie.get('kind') in person.get('content_types')
    log.debug('Movie kind: ' + movie.get('kind') + ' found in config types '
    + ', '.join(person.get('content_types')) + ': ' + str(type_test))
    log.debug('Movie kind: ' + movie.get('kind') + ' found in config types ' +
    ', '.join(person.get('content_types')) + ': ' + str(type_test))

    if config.get('rating'):
    if not movie.get('rating'):
    @@ -258,7 +258,7 @@ def on_task_input(self, task, config):
    year_test = True
    else:
    year_test = (movie.get('year') >= year_range[0] and (movie.get('year') <= year_range[1]))
    log.info('Movie year: ' + str((movie.get('year'))) + ' is in given range of '
    log.debug('Movie year: ' + str((movie.get('year'))) + ' is in given range of '
    + str(year_range[0]) + ' and ' + str(year_range[1]) + ': ' + str(year_test))
    else:
    log.debug('No years test required, skipping year test.')
    @@ -281,7 +281,7 @@ def on_task_input(self, task, config):
    votes_test = True

    if exclude_list:
    exclude_test = not self.genres_match(exclude_list, movie.get('genres', ''))
    exclude_test = not self.genres_match(exclude_list, movie.get('genres', []))
    log.debug('Exclude genres: ' + ', '.join(
    exclude_list.get('genres', [])) + ' with match type ' + exclude_list.get(
    'match_type') + ' are not found in movie genres: ' + ', '.join(movie.get('genres', [])) +
    @@ -291,7 +291,7 @@ def on_task_input(self, task, config):
    exclude_test = True

    if include_list:
    include_test = self.genres_match(include_list, movie.get('genres', ''))
    include_test = self.genres_match(include_list, movie.get('genres', []))
    log.debug('Include genres: ' + ', '.join(
    include_list.get('genres', [])) + ' with match type ' + include_list.get(
    'match_type') + ' are found in movie genres: ' + ', '.join(movie.get('genres', [])) +
  6. liiight revised this gist Sep 2, 2015. 1 changed file with 195 additions and 76 deletions.
    271 changes: 195 additions & 76 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -3,11 +3,11 @@
    import logging
    import re
    from imdb._exceptions import IMDbDataAccessError
    from jsonschema.compat import str_types
    from flexget import plugin
    from flexget.event import event
    from flexget.entry import Entry


    from imdb import IMDb
    from flexget.config_schema import format_checker

    @@ -19,20 +19,31 @@
    'editor', 'miscellaneous', 'editorial department', 'cinematographer',
    'visual effects', 'thanks', 'music department']
    CONTENT_TYPES = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']
    YEAR_FORMATS = [r'(\d{4)}$', r'\-(\d{4})$', r'(\d{4})\-$', r'(\d{4})\-(\d{4})$']
    MIN_YEAR_RANGE = 1700
    MAX_YEAR_RANGE = 3000
    # TODO Improve year regex detection
    YEAR_FORMATS = [r'^((19|20)\d{2})$', r'^\-((19|20)\d{2})$', r'^((19|20)\d{2})\-$', r'^((19|20)\d{2})\-((19|20)\d{2}$)']

    MIN_YEAR_RANGE = 1900
    MAX_YEAR_RANGE = 2099


    class SmartIMDB(object):
    genres_schema = {
    'type': 'object',
    'properties': {
    'genres': {"type": "array", "items": {"type": "string"}},
    'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact'], 'default': 'any'}
    },
    'required': ['genres'],
    'additionalProperties': False
    }

    entity_schema = {
    person_schema = {
    'type': 'object',
    'properties': {
    'name': {'type': 'string'},
    'id': {'type': 'string'},
    'jobtypes': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES, 'default': 'actor'}},
    'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES, 'default': 'movie'}}
    'jobtypes': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES}, 'default': ['actor']},
    'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES}, 'default': ['movie']}
    },
    'oneOf': [{'required': ['name']}, {'required': ['id']}],
    'error_oneOf': 'Either a name of ID are required.',
    @@ -42,12 +53,12 @@ class SmartIMDB(object):
    schema = {
    'type': 'object',
    'properties': {
    'person': entity_schema,
    'include_genres': {"type": "array", "items": {"type": "string"}},
    'exclude_genres': {"type": "array", "items": {"type": "string"}},
    'rating': {'type': 'number'},
    'person': person_schema,
    'include_genres': genres_schema,
    'exclude_genres': genres_schema,
    'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
    'votes': {'type': 'number'},
    'years': {'type': 'string'},
    'years': {'type': 'string', 'format': 'year_format'},
    'max_entries': {'type': 'number', 'default': 200},
    'strict_mode': {'type': 'boolean', 'default': False}
    },
    @@ -66,15 +77,21 @@ def is_imdb_id(string):
    return re.search('[a-zA-Z]{2}(\d{7})', string)

    def get_all_movies_by_person(self, person):
    """
    Gets a person object from config and return all movies according to parameters
    :param person:
    :return: Movie list
    """
    # TODO split this to (at least) two methods
    movies = []

    if person.get('id'):
    if self.is_imdb_id(person.get('id')):
    log.info('Detected IMDB ID %s, resolving person.' % person.get('id'))
    log.debug('Detected IMDB ID %s, resolving person.' % person.get('id'))
    m = self.is_imdb_id(person.get('id'))
    IMDB_person = ia.get_person(m.group(1))
    if not person:
    log.warning('Could not resolve person from IMDB ID %s' % person.get('id'))
    else:
    log.error('Could not resolve person from IMDB ID %s' % person.get('id'))
    return
    else:
    log.debug('Trying to search for person: %s' % person.get('name'))
    @@ -84,121 +101,223 @@ def get_all_movies_by_person(self, person):
    else:
    return

    if not person.get('jobtypes'): # Special case, since 'actor' and 'actress' are separate keys in object
    log.debug('Getting movies for actor/actress: %s' % IMDB_person['name'])
    if person.get('jobtypes') == ['actor']: # Special case, since 'actor' and 'actress' are separate keys in object
    log.info('Getting movies for actor/actress: %s' % IMDB_person['name'])
    return IMDB_person.get('actor', IMDB_person.get('actress'))
    else:
    log.debug('Getting movies for %s with the following jobtypes: %s' %
    (IMDB_person['name'], ' ,'.join(person.get('jobtypes'))))
    log.info('Getting movies for %s with the following jobtypes: %s' %
    (IMDB_person['name'], ' ,'.join(person.get('jobtypes'))))
    for jobtype in person['jobtypes']:
    for content_type in person.get('content_types'):
    job_and_content = jobtype + ' ' + content_type
    log.debug('Searching for movies that correlates to: ' + job_and_content)
    movies_by_jobtype = IMDB_person.get(job_and_content, IMDB_person.get(jobtype))
    if movies_by_jobtype:
    movies += movies_by_jobtype
    for movie in movies_by_jobtype:
    if movie not in movies:
    log.debug('Adding movie: ' + str(movie))
    movies.append(movie)
    else:
    log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
    return movies

    def parse_year(self, year):
    def parse_year(self, year=None):
    """
    Receives 'year_range_format' string and parses it to return start and end dates
    :param year:
    :return: Tuple with start and end year. Uses max an min year values if either one is needed
    """
    if not year:
    log.debug('No year filter in config, returning defaults')
    return MIN_YEAR_RANGE, MAX_YEAR_RANGE

    for i in range(len(YEAR_FORMATS)):
    m = re.match(YEAR_FORMATS[i], year)
    if m:
    if i == 0:
    log.debug('Matched year regex group ' + str(i))
    return int(m.group(1)), int(m.group(1))
    elif i == 1:
    log.debug('Matched year regex group ' + str(i))
    return MIN_YEAR_RANGE, int(m.group(1))
    elif i == 2:
    log.debug('Matched year regex group ' + str(i))
    return int(m.group(1)), MAX_YEAR_RANGE
    elif i == 3:
    return int(m.group(1)), int(m.group(2))
    else:
    return MIN_YEAR_RANGE, MAX_YEAR_RANGE
    log.debug('Matched year regex group ' + str(i))
    return int(m.group(1)), int(m.group(3))

    return MIN_YEAR_RANGE, MAX_YEAR_RANGE

    def clean_list(self, genres_list):
    """
    Gets a list and return a new list with lowercase elements
    :param list:
    :return:
    """
    if not isinstance(genres_list, list):
    return

    new_list = []
    for item in genres_list:
    new_list.append(str(item).lower())
    return new_list

    def genres_match(self, user_genres, movie_genres):
    """
    Takes a genres object from config schema and tries to match it with movie genres according to match type.
    If match type is 'any', will return true if any of the user specified genres exists in the movie genres list.
    if match type is 'all' will return true if all of the user specified genres exists in the movie genres list.
    if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres list.
    :param user_genres:
    :param movie_genres:
    :return:
    """
    if not bool(user_genres):
    return True

    user_genres_list = self.clean_list(user_genres.get('genres'))
    movie_genres_list = self.clean_list(movie_genres)
    match_type = user_genres.get('match_type')

    if match_type == 'any':
    return bool(
    set(user_genres_list) & set(movie_genres_list))
    elif match_type == 'all':
    return not bool(
    set(user_genres_list) - set(movie_genres_list))
    elif match_type == 'exact':
    return not bool(
    set(user_genres_list) ^ set(movie_genres_list))

    @format_checker.checks('year_format', raises=ValueError)
    def is_year_format(instance):
    if not isinstance(instance, str_types):
    return True

    for regex in YEAR_FORMATS:
    m = re.match(regex, instance)
    if m:
    return True
    raise ValueError('Invalid year format, or years out of range of %d to %d. Please check config.'
    % (MIN_YEAR_RANGE, MAX_YEAR_RANGE))


    def on_task_input(self, task, config):
    entries = []
    person = config.get('person')

    if config.get('strict_mode'):
    default_rating = 0
    default_start_year = MIN_YEAR_RANGE
    default_end_year = MAX_YEAR_RANGE
    default_votes = 0
    else:
    default_rating = 10
    default_start_year = MAX_YEAR_RANGE
    default_end_year = MIN_YEAR_RANGE
    default_votes = 1000000

    person = config.get('person')
    include_list = config.get('include_genres')
    exclude_list = config.get('exclude_genres')
    movies = self.get_all_movies_by_person(person)

    if not movies:
    log.warning('Could not get movie list. Check your config:')
    log.error('Could not get movie list, check your configuration.')
    return

    year_range = self.parse_year(config.get('years'))
    log.info('Retrieved %d movies, starting to filter.' % len(movies))

    for movie in movies:
    try:
    ia.update(movie)
    except IMDbDataAccessError as e:
    except Exception as e:
    log.error('An error has occurred, cannot get movie data: %s' % e)
    continue

    log.debug('Testing movie:' + movie.get('long imdb canonical title'))
    log.debug(
    'Testing if movie: ' + movie.get('long imdb canonical title') + ' qualifies for adding to entries.')

    type_test = movie.get('kind') in person.get('content_types')
    rating_test = float(movie.get('rating', default_rating)) >= config.get('rating', 1)
    year_test = (movie.get('year', default_start_year)) >= year_range[0] and\
    (movie.get('year', default_end_year)) <= year_range[1]
    votes_test = int(movie.get('votes', default_votes)) >= config.get('votes', 1)
    log.debug('Movie kind: ' + movie.get('kind') + ' found in config types '
    + ', '.join(person.get('content_types')) + ': ' + str(type_test))

    if type_test and rating_test and year_test and votes_test:
    entry = Entry(title=movie['title'],
    imdb_id='tt' + ia.get_imdbID(movie),
    url=ia.get_imdbURL(movie))
    if config.get('include_genres') or config.get('exclude_genres'):
    for ex_genre in config.get('exclude_genres', []):
    if ' ,'.join(movie.get('genres', [])).lower().find(ex_genre.lower()) == -1:
    for inc_genre in config.get('include_genres'):
    log.debug('Checking if %s is in movie genres: %s' %
    (inc_genre, ' ,'.join(movie.get('genres', ''))))
    if ' ,'.join(movie.get('genres', '')).lower().find(inc_genre.lower()) != -1:
    if entry.isvalid():
    log.debug('Genres test passed')
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" URL: %s" % entry["url"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])
    else:
    log.error('Invalid entry created? %s' % entry)
    if config.get('rating'):
    if not movie.get('rating'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have rating value, skipping movie.')
    rating_test = False
    else:
    log.debug('Movie does not have rating listed, skipping test.')
    rating_test = True
    else:
    rating_test = float(movie.get('rating')) >= config.get('rating', 1)
    log.debug('Movie rating: ' + str(movie.get('rating')) +
    ' is higher or equal to: ' + str(config.get('rating', '1')) + ': ' + str(rating_test))
    else:
    log.debug('No rating test required, skipping rating test.')
    rating_test = True

    if config.get('years'):
    if not movie.get('year'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have year value, skipping movie.')
    year_test = False
    else:
    log.debug('Movie does not have year listed, skipping test.')
    year_test = True
    else:
    log.debug('No genres requested in config, passing all genres.')
    if entry.isvalid():
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" URL: %s" % entry["url"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])
    year_test = (movie.get('year') >= year_range[0] and (movie.get('year') <= year_range[1]))
    log.info('Movie year: ' + str((movie.get('year'))) + ' is in given range of '
    + str(year_range[0]) + ' and ' + str(year_range[1]) + ': ' + str(year_test))
    else:
    log.debug('No years test required, skipping year test.')
    year_test = True

    if config.get('votes'):
    if not movie.get('votes'):
    if config.get('strict_mode'):
    log.debug('Strict mode: Movie does not have votes value, skipping movie.')
    votes_test = False
    else:
    log.error('Invalid entry created? %s' % entry)
    log.debug('Movie does not have votes listed, skipping test.')
    votes_test = True
    else:
    votes_test = int(movie.get('votes')) >= config.get('votes', 1)
    log.debug('Movie votes: ' + str(movie.get('votes')) +
    ' are higher or equal to: ' + str(config.get('votes', '1')) + ': ' + str(votes_test))
    else:
    log.debug('No votes test required, skipping votes test.')
    votes_test = True

    if exclude_list:
    exclude_test = not self.genres_match(exclude_list, movie.get('genres', ''))
    log.debug('Exclude genres: ' + ', '.join(
    exclude_list.get('genres', [])) + ' with match type ' + exclude_list.get(
    'match_type') + ' are not found in movie genres: ' + ', '.join(movie.get('genres', [])) +
    ': ' + str(exclude_test))
    else:
    log.debug('No genres exclude test required, skipping exclude list test.')
    exclude_test = True

    if include_list:
    include_test = self.genres_match(include_list, movie.get('genres', ''))
    log.debug('Include genres: ' + ', '.join(
    include_list.get('genres', [])) + ' with match type ' + include_list.get(
    'match_type') + ' are found in movie genres: ' + ', '.join(movie.get('genres', [])) +
    ': ' + str(include_test))
    else:
    log.debug('No genres include test required, skipping include list test.')
    include_test = True

    if type_test and rating_test and year_test and votes_test and exclude_test and include_test:
    entry = Entry(title=movie['title'],
    imdb_id='tt' + ia.get_imdbID(movie),
    url='')
    if entry.isvalid():
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])
    else:
    log.error('Invalid entry created? %s' % entry)

    if len(entries) <= config.get('max_entries'):
    return entries
    else:
    log.warning(
    log.error(
    'Number of entries (%s) exceeds maximum allowed value %s. '
    'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
    len(entries), config.get('max_entries')))
  7. liiight revised this gist Aug 20, 2015. 1 changed file with 20 additions and 17 deletions.
    37 changes: 20 additions & 17 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -43,7 +43,8 @@ class SmartIMDB(object):
    'type': 'object',
    'properties': {
    'person': entity_schema,
    'genres': {"type": "array", "items": {"type": "string"}},
    'include_genres': {"type": "array", "items": {"type": "string"}},
    'exclude_genres': {"type": "array", "items": {"type": "string"}},
    'rating': {'type': 'number'},
    'votes': {'type': 'number'},
    'years': {'type': 'string'},
    @@ -162,22 +163,24 @@ def on_task_input(self, task, config):
    entry = Entry(title=movie['title'],
    imdb_id='tt' + ia.get_imdbID(movie),
    url=ia.get_imdbURL(movie))
    if config.get('genres'):
    for genre in config.get('genres'):
    log.debug('Checking if %s is in movie genres: %s' %
    (genre, ' ,'.join(movie.get('genres', ''))))
    if ' ,'.join(movie.get('genres', '')).lower().find(genre.lower()) != -1:
    if entry.isvalid():
    log.debug('Genres test passed')
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" URL: %s" % entry["url"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])
    else:
    log.error('Invalid entry created? %s' % entry)
    if config.get('include_genres') or config.get('exclude_genres'):
    for ex_genre in config.get('exclude_genres', []):
    if ' ,'.join(movie.get('genres', [])).lower().find(ex_genre.lower()) == -1:
    for inc_genre in config.get('include_genres'):
    log.debug('Checking if %s is in movie genres: %s' %
    (inc_genre, ' ,'.join(movie.get('genres', ''))))
    if ' ,'.join(movie.get('genres', '')).lower().find(inc_genre.lower()) != -1:
    if entry.isvalid():
    log.debug('Genres test passed')
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" URL: %s" % entry["url"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])
    else:
    log.error('Invalid entry created? %s' % entry)
    else:
    log.debug('No genres requested in config, passing all genres.')
    if entry.isvalid():
  8. liiight created this gist Aug 20, 2015.
    208 changes: 208 additions & 0 deletions smart_imdb.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,208 @@
    from __future__ import unicode_literals, division, absolute_import

    import logging
    import re
    from imdb._exceptions import IMDbDataAccessError
    from flexget import plugin
    from flexget.event import event
    from flexget.entry import Entry


    from imdb import IMDb
    from flexget.config_schema import format_checker

    log = logging.getLogger('smart_imdb')

    ia = IMDb()

    JOB_TYPES = ['actor', 'director', 'producer', 'writer', 'self',
    'editor', 'miscellaneous', 'editorial department', 'cinematographer',
    'visual effects', 'thanks', 'music department']
    CONTENT_TYPES = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']
    YEAR_FORMATS = [r'(\d{4)}$', r'\-(\d{4})$', r'(\d{4})\-$', r'(\d{4})\-(\d{4})$']
    MIN_YEAR_RANGE = 1700
    MAX_YEAR_RANGE = 3000


    class SmartIMDB(object):

    entity_schema = {
    'type': 'object',
    'properties': {
    'name': {'type': 'string'},
    'id': {'type': 'string'},
    'jobtypes': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES, 'default': 'actor'}},
    'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES, 'default': 'movie'}}
    },
    'oneOf': [{'required': ['name']}, {'required': ['id']}],
    'error_oneOf': 'Either a name of ID are required.',
    'additionalProperties': False
    }

    schema = {
    'type': 'object',
    'properties': {
    'person': entity_schema,
    'genres': {"type": "array", "items": {"type": "string"}},
    'rating': {'type': 'number'},
    'votes': {'type': 'number'},
    'years': {'type': 'string'},
    'max_entries': {'type': 'number', 'default': 200},
    'strict_mode': {'type': 'boolean', 'default': False}
    },
    'required': ['person'],
    'additionalProperties': False

    }

    @staticmethod
    def is_imdb_id(string):
    """
    Checks if a given string matches IMDB id format
    :param string: Supposed IMDB ID
    :return: True if it matches
    """
    return re.search('[a-zA-Z]{2}(\d{7})', string)

    def get_all_movies_by_person(self, person):
    movies = []

    if person.get('id'):
    if self.is_imdb_id(person.get('id')):
    log.info('Detected IMDB ID %s, resolving person.' % person.get('id'))
    m = self.is_imdb_id(person.get('id'))
    IMDB_person = ia.get_person(m.group(1))
    if not person:
    log.warning('Could not resolve person from IMDB ID %s' % person.get('id'))
    return
    else:
    log.debug('Trying to search for person: %s' % person.get('name'))
    person_list = ia.search_person(person.get('name'))
    if person_list:
    IMDB_person = ia.get_person(person_list[0].personID)
    else:
    return

    if not person.get('jobtypes'): # Special case, since 'actor' and 'actress' are separate keys in object
    log.debug('Getting movies for actor/actress: %s' % IMDB_person['name'])
    return IMDB_person.get('actor', IMDB_person.get('actress'))
    else:
    log.debug('Getting movies for %s with the following jobtypes: %s' %
    (IMDB_person['name'], ' ,'.join(person.get('jobtypes'))))
    for jobtype in person['jobtypes']:
    for content_type in person.get('content_types'):
    job_and_content = jobtype + ' ' + content_type
    movies_by_jobtype = IMDB_person.get(job_and_content, IMDB_person.get(jobtype))
    if movies_by_jobtype:
    movies += movies_by_jobtype
    return movies

    def parse_year(self, year):
    """
    Receives 'year_range_format' string and parses it to return start and end dates
    :param year:
    :return: Tuple with start and end year. Uses max an min year values if either one is needed
    """
    if not year:
    return MIN_YEAR_RANGE, MAX_YEAR_RANGE

    for i in range(len(YEAR_FORMATS)):
    m = re.match(YEAR_FORMATS[i], year)
    if m:
    if i == 0:
    return int(m.group(1)), int(m.group(1))
    elif i == 1:
    return MIN_YEAR_RANGE, int(m.group(1))
    elif i == 2:
    return int(m.group(1)), MAX_YEAR_RANGE
    elif i == 3:
    return int(m.group(1)), int(m.group(2))
    else:
    return MIN_YEAR_RANGE, MAX_YEAR_RANGE

    def on_task_input(self, task, config):
    entries = []
    person = config.get('person')

    if config.get('strict_mode'):
    default_rating = 0
    default_start_year = MIN_YEAR_RANGE
    default_end_year = MAX_YEAR_RANGE
    default_votes = 0
    else:
    default_rating = 10
    default_start_year = MAX_YEAR_RANGE
    default_end_year = MIN_YEAR_RANGE
    default_votes = 1000000

    movies = self.get_all_movies_by_person(person)

    if not movies:
    log.warning('Could not get movie list. Check your config:')
    return

    year_range = self.parse_year(config.get('years'))

    for movie in movies:
    try:
    ia.update(movie)
    except IMDbDataAccessError as e:
    log.error('An error has occurred, cannot get movie data: %s' % e)
    continue

    log.debug('Testing movie:' + movie.get('long imdb canonical title'))

    type_test = movie.get('kind') in person.get('content_types')
    rating_test = float(movie.get('rating', default_rating)) >= config.get('rating', 1)
    year_test = (movie.get('year', default_start_year)) >= year_range[0] and\
    (movie.get('year', default_end_year)) <= year_range[1]
    votes_test = int(movie.get('votes', default_votes)) >= config.get('votes', 1)

    if type_test and rating_test and year_test and votes_test:
    entry = Entry(title=movie['title'],
    imdb_id='tt' + ia.get_imdbID(movie),
    url=ia.get_imdbURL(movie))
    if config.get('genres'):
    for genre in config.get('genres'):
    log.debug('Checking if %s is in movie genres: %s' %
    (genre, ' ,'.join(movie.get('genres', ''))))
    if ' ,'.join(movie.get('genres', '')).lower().find(genre.lower()) != -1:
    if entry.isvalid():
    log.debug('Genres test passed')
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" URL: %s" % entry["url"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])
    else:
    log.error('Invalid entry created? %s' % entry)
    else:
    log.debug('No genres requested in config, passing all genres.')
    if entry.isvalid():
    if entry not in entries:
    entries.append(entry)
    if entry and task.options.test:
    log.info("Test mode. Entry includes:")
    log.info(" Title: %s" % entry["title"])
    log.info(" URL: %s" % entry["url"])
    log.info(" IMDB ID: %s" % entry["imdb_id"])

    else:
    log.error('Invalid entry created? %s' % entry)

    if len(entries) <= config.get('max_entries'):
    return entries
    else:
    log.warning(
    'Number of entries (%s) exceeds maximum allowed value %s. '
    'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
    len(entries), config.get('max_entries')))

    return


    @event('plugin.register')
    def register_plugin():
    plugin.register(SmartIMDB, 'smart_imdb', api_ver=2)