Skip to content

Instantly share code, notes, and snippets.

@liiight
Last active November 3, 2019 21:23
Show Gist options
  • Select an option

  • Save liiight/ccff4ddb199b31163c7a to your computer and use it in GitHub Desktop.

Select an option

Save liiight/ccff4ddb199b31163c7a to your computer and use it in GitHub Desktop.
from __future__ import unicode_literals, division, absolute_import
import logging
import re
from jsonschema.compat import str_types
from flexget import plugin
from flexget.event import event
from flexget.entry import Entry
from imdb import IMDb
from flexget.config_schema import format_checker
log = logging.getLogger('smart_imdb')
ia = IMDb()
JOB_TYPES = ['actor', 'director', 'producer', 'writer', 'self',
'editor', 'miscellaneous', 'editorial department', 'cinematographer',
'visual effects', 'thanks', 'music department']
CONTENT_TYPES = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']
YEAR_FORMATS = [r'^((19|20)\d{2})$', r'^\-((19|20)\d{2})$', r'^((19|20)\d{2})\-$', r'^((19|20)\d{2})\-((19|20)\d{2}$)']
ENTITIES_FORMATS = {
'Person': r'nm(\d{7})',
'Company': r'co(\d{7})'
}
MIN_YEAR_RANGE = 1900
MAX_YEAR_RANGE = 2099
class SmartIMDB(object):
genres_schema = {
'type': 'object',
'properties': {
'genres': {"type": "array", "items": {"type": "string"}},
'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact'], 'default': 'any'}
},
'required': ['genres'],
'additionalProperties': False
}
job_types = {'type': 'string', 'enum': JOB_TYPES}
content_types = {'type': 'string', 'enum': CONTENT_TYPES}
schema = {'oneOf': [{'type': 'string'},
{'type': 'object',
'properties': {
'id': {'type': 'string'},
'job_types': {'oneOf': [{'type': 'array', 'items': job_types}, job_types]},
'content_types': {'oneOf': [{'type': 'array', 'items': content_types}, content_types]},
'include_genres': genres_schema,
'exclude_genres': genres_schema,
'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
'votes': {'type': 'number'},
'years': {'type': 'string', 'format': 'year_format'},
'max_entries': {'type': 'number'},
'strict_mode': {'type': 'boolean'}
},
'required': ['id'],
'additionalProperties': False}]
}
def entity_type_and_object(self, imdb_id):
"""
Return a tuple of entity type and entity object
:param imdb_id: string which contains IMDB id
:return: entity type, entity object (person, company, etc.)
"""
for imdb_entity_type, imdb_entity_format in ENTITIES_FORMATS.items():
m = re.search(imdb_entity_format, imdb_id)
if m:
if imdb_entity_type == 'Person':
log.info('Starting to retrieve items for person: %s' % ia.get_person(m.group(1)))
return imdb_entity_type, ia.get_person(m.group(1))
elif imdb_entity_type == 'Company':
log.info('Starting to retrieve items for company: %s' % ia.get_company(m.group(1)))
return imdb_entity_type, ia.get_company(m.group(1))
def items_by_entity(self, entity_type, entity_object, content_types, job_types):
"""
Gets entity object and return movie list
:param entity_type: Person, company, etc.
:param entity_object: The object itself
:param content_types: as defined in config
:param job_types: As defined in config
:return:
"""
movies = []
if entity_type == 'Company':
return entity_object.get('production companies')
elif entity_type == 'Person':
if 'actor' in job_types:
job_types.append('actress')
for job_type in job_types:
for content_type in content_types:
job_and_content = job_type + ' ' + content_type
log.debug('Searching for movies that correlates to: ' + job_and_content)
movies_by_job_type = entity_object.get(job_and_content, entity_object.get(job_type))
if movies_by_job_type:
for movie in movies_by_job_type:
if movie not in movies:
log.verbose('Found item: ' + movie.get('title') + ' ,adding to raw list')
movies.append(movie)
else:
log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
return movies
def parse_year(self, year):
"""
Receives 'year_range_format' string and parses it to return start and end dates
:param year:
:return: Tuple with start and end year. Uses max an min year values if either one is needed
"""
if not year:
log.debug('No year filter in config, returning defaults.')
return MIN_YEAR_RANGE, MAX_YEAR_RANGE
for i in range(len(YEAR_FORMATS)):
m = re.match(YEAR_FORMATS[i], year)
if m:
if i == 0:
log.debug('Matched year regex group ' + str(i))
return int(m.group(1)), int(m.group(1))
elif i == 1:
log.debug('Matched year regex group ' + str(i))
return MIN_YEAR_RANGE, int(m.group(1))
elif i == 2:
log.debug('Matched year regex group ' + str(i))
return int(m.group(1)), MAX_YEAR_RANGE
elif i == 3:
log.debug('Matched year regex group ' + str(i))
return int(m.group(1)), int(m.group(3))
return MIN_YEAR_RANGE, MAX_YEAR_RANGE
def clean_list(self, genres_list):
"""
Gets a list and return a new list with lowercase elements
:param genres_list: List of genres to clean
:return:
"""
if not isinstance(genres_list, list):
return
new_list = []
for item in genres_list:
new_list.append(str(item).lower())
return new_list
def genres_match(self, user_genres, movie_genres):
"""
Takes a genres object from config schema and tries to match it with movie genres according to match type.
If match type is 'any', will return true if any of the user specified genres exists in the movie genres list.
if match type is 'all' will return true if all of the user specified genres exists in the movie genres list.
if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres
list.
:param user_genres: Genres from config
:param movie_genres: Movie genres
:return:
"""
if not bool(user_genres):
return True
user_genres_list = self.clean_list(user_genres.get('genres'))
movie_genres_list = self.clean_list(movie_genres)
match_type = user_genres.get('match_type')
log.debug('Matching user genres: ' + ', '.join(user_genres_list) + ' with movie genres: ' +
', '.join(movie_genres_list) + ' and match type: ' + match_type)
if match_type == 'any':
return bool(
set(user_genres_list) & set(movie_genres_list))
elif match_type == 'all':
return not bool(
set(user_genres_list) - set(movie_genres_list))
elif match_type == 'exact':
return not bool(
set(user_genres_list) ^ set(movie_genres_list))
@format_checker.checks('year_format', raises=ValueError)
def is_year_format(instance):
if not isinstance(instance, str_types):
return True
for regex in YEAR_FORMATS:
m = re.match(regex, instance)
if m:
return True
raise ValueError('Invalid year format, or years out of range of %d to %d. Please check config.'
% (MIN_YEAR_RANGE, MAX_YEAR_RANGE))
def prepare_config(self, config):
"""
Converts config to dict form and sets defaults if needed
"""
if not isinstance(config, dict):
config = {'id': config}
config.setdefault('content_types', [CONTENT_TYPES[0]])
config.setdefault('job_types', [JOB_TYPES[0]])
config.setdefault('max_entries', 200)
config.setdefault('strict_mode', False)
if isinstance(config.get('content_types'), str_types):
log.debug('Converted content type from string to list.')
config['content_types'] = [config['content_types']]
if isinstance(config['job_types'], str_types):
log.debug('Converted job type from string to list.')
config['job_types'] = [config['job_types']]
return config
def on_task_input(self, task, config):
entries = []
config = self.prepare_config(config)
include_list = config.get('include_genres')
exclude_list = config.get('exclude_genres')
entity_type, entity_object = self.entity_type_and_object(config.get('id'))
items = self.items_by_entity(entity_type, entity_object,
config.get('content_types', []), config.get('job_types', []))
if not items:
log.error('Could not get IMDB item list, check your configuration.')
return
year_range = self.parse_year(config.get('years'))
log.info('Retrieved %d items, starting to filter list.' % len(items))
for item in items:
try:
ia.update(item)
except Exception as e:
log.error('An error has occurred, cannot get movie data: %s' % e)
continue
log.debug(
'Testing if movie: ' + item.get('long imdb canonical title') + ' qualifies for adding to entries.')
type_test = item.get('kind') in config.get('content_types')
log.debug('Movie kind: ' + item.get('kind') + ' found in config types ' +
', '.join(config.get('content_types')) + ': ' + str(type_test))
if config.get('rating'):
if not item.get('rating'):
if config.get('strict_mode'):
log.debug('Strict mode: Movie does not have rating value, skipping movie.')
rating_test = False
else:
log.debug('Movie does not have rating listed, skipping test.')
rating_test = True
else:
rating_test = float(item.get('rating')) >= config.get('rating', 1)
log.debug('Movie rating: ' + str(item.get('rating')) +
' is higher or equal to: ' + str(config.get('rating', '1')) + ': ' + str(rating_test))
else:
log.debug('No rating test required, skipping rating test.')
rating_test = True
if config.get('years'):
if not item.get('year'):
if config.get('strict_mode'):
log.debug('Strict mode: Movie does not have year value, skipping movie.')
year_test = False
else:
log.debug('Movie does not have year listed, skipping test.')
year_test = True
else:
year_test = (item.get('year') >= year_range[0] and (item.get('year') <= year_range[1]))
log.debug(u'Movie year: {0} is in given range of {1} and {2}: {3}'.format(str((item.get('year'))),
str(year_range[0]),
str(year_range[1]),
str(year_test)))
else:
log.debug('No years test required, skipping year test.')
year_test = True
if config.get('votes'):
if not item.get('votes'):
if config.get('strict_mode'):
log.debug('Strict mode: Movie does not have votes value, skipping movie.')
votes_test = False
else:
log.debug('Movie does not have votes listed, skipping test.')
votes_test = True
else:
votes_test = int(item.get('votes')) >= config.get('votes', 1)
log.debug('Movie votes: ' + str(item.get('votes')) +
' are higher or equal to: ' + str(config.get('votes', '1')) + ': ' + str(votes_test))
else:
log.debug('No votes test required, skipping votes test.')
votes_test = True
if exclude_list:
exclude_test = not self.genres_match(exclude_list, item.get('genres', []))
log.debug('Exclude genres: ' + ', '.join(exclude_list.get('genres', [])) + ' with match type ' +
exclude_list.get('match_type') + ' are not found in item genres: ' +
', '.join(item.get('genres', [])) + ': ' + str(exclude_test))
else:
log.debug('No genres exclude test required, skipping exclude list test.')
exclude_test = True
if include_list:
include_test = self.genres_match(include_list, item.get('genres', []))
log.debug('Include genres: ' + ', '.join(include_list.get('genres', [])) +
' with match type ' + include_list.get('match_type') + ' are found in item genres: ' +
', '.join(item.get('genres', [])) + ': ' + str(include_test))
else:
log.debug('No genres include test required, skipping include list test.')
include_test = True
if type_test and rating_test and year_test and votes_test and exclude_test and include_test:
entry = Entry(title=item['title'],
imdb_id='tt' + ia.get_imdbID(item),
url='')
if entry.isvalid():
if entry not in entries:
entries.append(entry)
if entry and task.options.test:
log.info("Test mode. Entry includes:")
log.info(" Title: %s" % entry["title"])
log.info(" IMDB ID: %s" % entry["imdb_id"])
else:
log.error('Invalid entry created? %s' % entry)
if len(entries) <= config.get('max_entries'):
return entries
else:
log.error(
'Number of entries (%s) exceeds maximum allowed value %s. '
'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
len(entries), config.get('max_entries')))
return
@event('plugin.register')
def register_plugin():
plugin.register(SmartIMDB, 'smart_imdb', api_ver=2)
@bubufofo
Copy link
Copy Markdown

bubufofo commented Nov 3, 2019

does this work for games

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment