liiight · November 3, 2019 21:23 · bubufofo · Nov 3, 2019
diff --git a/smart_imdb.py b/smart_imdb.py
 from __future__ import unicode_literals, division, absolute_import

 import logging
 import re
 from jsonschema.compat import str_types
 from flexget import plugin
 from flexget.event import event
 from flexget.entry import Entry

 from imdb import IMDb
 from flexget.config_schema import format_checker

 log = logging.getLogger('smart_imdb')

 ia = IMDb()

 JOB_TYPES = ['actor', 'director', 'producer', 'writer', 'self',
             'editor', 'miscellaneous', 'editorial department', 'cinematographer',
             'visual effects', 'thanks', 'music department']
 CONTENT_TYPES = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']
 # TODO Improve year regex detection
 YEAR_FORMATS = [r'^((19|20)\d{2})$', r'^\-((19|20)\d{2})$', r'^((19|20)\d{2})\-$', r'^((19|20)\d{2})\-((19|20)\d{2}$)']

 MIN_YEAR_RANGE = 1900
 MAX_YEAR_RANGE = 2099


 class SmartIMDB(object):
    genres_schema = {
        'type': 'object',
        'properties': {
            'genres': {"type": "array", "items": {"type": "string"}},
            'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact'], 'default': 'any'}
        },
        'required': ['genres'],
        'additionalProperties': False
    }

    person_schema = {
        'type': 'object',
        'properties': {
            'name': {'type': 'string'},
            'id': {'type': 'string'},
            'job_types': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES}, 'default': ['actor']},
            'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES}, 'default': ['movie']}
        },
        'oneOf': [{'required': ['name']}, {'required': ['id']}],
        'error_oneOf': 'Either a name of ID are required.',
        'additionalProperties': False
    }

    schema = {
        'type': 'object',
        'properties': {
            'person': person_schema,
            'include_genres': genres_schema,
            'exclude_genres': genres_schema,
            'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
            'votes': {'type': 'number'},
            'years': {'type': 'string', 'format': 'year_format'},
            'max_entries': {'type': 'number', 'default': 200},
            'strict_mode': {'type': 'boolean', 'default': False}
        },
        'required': ['person'],
        'additionalProperties': False

    }

    @staticmethod
    def is_imdb_id(string):
        """
        Checks if a given string matches IMDB id format
        :param string: Supposed IMDB ID
        :return: True if it matches
        """
        return re.search('[a-zA-Z]{2}(\d{7})', string)

    def get_all_movies_by_person(self, person):
        """
        Gets a person object from config and return all movies according to parameters
        :param person:
        :return: Movie list
        """
        # TODO split this to (at least) two methods
        movies = []

        if person.get('id'):
            if self.is_imdb_id(person.get('id')):
                log.debug('Detected IMDB ID %s, resolving person.' % person.get('id'))
                m = self.is_imdb_id(person.get('id'))
                IMDB_person = ia.get_person(m.group(1))
            else:
                log.error('Could not resolve person from IMDB ID %s' % person.get('id'))
                return
        else:
            log.debug('Trying to search for person: %s' % person.get('name'))
            person_list = ia.search_person(person.get('name'))
            if person_list:
                IMDB_person = ia.get_person(person_list[0].personID)
            else:
                return
            
        #  Special case: Actress and actor are different roles.
        if 'actor' in person.get('job_types'):
            person['job_types'].append('actress')

        log.info('Getting movies for %s with the following job types: %s' %
                 (IMDB_person['name'], ' ,'.join(person.get('job_types'))))
        for job_type in person['job_types']:
            for content_type in person.get('content_types'):
                job_and_content = job_type + ' ' + content_type
                log.debug('Searching for movies that correlates to: ' + job_and_content)
                movies_by_job_type = IMDB_person.get(job_and_content, IMDB_person.get(job_type))
                if movies_by_job_type:
                    for movie in movies_by_job_type:
                        if movie not in movies:
                            log.debug('Adding movie: ' + movie.get('title'))
                            movies.append(movie)
                        else:
                            log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
        return movies

    def parse_year(self, year=None):
        """
        Receives 'year_range_format' string and parses it to return start and end dates
        :param year:
        :return: Tuple with start and end year. Uses max an min year values if either one is needed
        """
        if not year:
            log.debug('No year filter in config, returning defaults')
            return MIN_YEAR_RANGE, MAX_YEAR_RANGE

        for i in range(len(YEAR_FORMATS)):
            m = re.match(YEAR_FORMATS[i], year)
            if m:
                if i == 0:
                    log.debug('Matched year regex group ' + str(i))
                    return int(m.group(1)), int(m.group(1))
                elif i == 1:
                    log.debug('Matched year regex group ' + str(i))
                    return MIN_YEAR_RANGE, int(m.group(1))
                elif i == 2:
                    log.debug('Matched year regex group ' + str(i))
                    return int(m.group(1)), MAX_YEAR_RANGE
                elif i == 3:
                    log.debug('Matched year regex group ' + str(i))
                    return int(m.group(1)), int(m.group(3))

        return MIN_YEAR_RANGE, MAX_YEAR_RANGE

    def clean_list(self, genres_list):
        """
        Gets a list and return a new list with lowercase elements
        :param list:
        :return:
        """
        if not isinstance(genres_list, list):
            return

        new_list = []
        for item in genres_list:
            new_list.append(str(item).lower())
        return new_list

    def genres_match(self, user_genres, movie_genres):
        """
        Takes a genres object from config schema and tries to match it with movie genres according to match type.
        If match type is 'any', will return true if any of the user specified genres exists in the movie genres list.
        if match type is 'all' will return true if all of the user specified genres exists in the movie genres list.
        if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres list.
        :param user_genres: Genres from config
        :param movie_genres: Movie genres
        :return:
        """
        if not bool(user_genres):
            return True

        user_genres_list = self.clean_list(user_genres.get('genres'))
        movie_genres_list = self.clean_list(movie_genres)
        match_type = user_genres.get('match_type')
        log.debug('Matching user genres: ' + ', '.join(user_genres_list) + ' with movie genres: '
                  + ', '.join(movie_genres_list) + ' and match type: ' + match_type)

        if match_type == 'any':
            return bool(
                set(user_genres_list) & set(movie_genres_list))
        elif match_type == 'all':
            return not bool(
                set(user_genres_list) - set(movie_genres_list))
        elif match_type == 'exact':
            return not bool(
                set(user_genres_list) ^ set(movie_genres_list))

    @format_checker.checks('year_format', raises=ValueError)
    def is_year_format(instance):
        if not isinstance(instance, str_types):
            return True

        for regex in YEAR_FORMATS:
            m = re.match(regex, instance)
            if m:
                return True
        raise ValueError('Invalid year format, or years out of range of %d to %d. Please check config.'
                         % (MIN_YEAR_RANGE, MAX_YEAR_RANGE))

    def on_task_input(self, task, config):
        entries = []

        person = config.get('person')
        include_list = config.get('include_genres')
        exclude_list = config.get('exclude_genres')
        movies = self.get_all_movies_by_person(person)

        if not movies:
            log.error('Could not get movie list, check your configuration.')
            return

        year_range = self.parse_year(config.get('years'))
        log.info('Retrieved %d movies, starting to filter.' % len(movies))

        for movie in movies:
            try:
                ia.update(movie)
            except Exception as e:
                log.error('An error has occurred, cannot get movie data: %s' % e)
                continue

            log.debug(
                'Testing if movie: ' + movie.get('long imdb canonical title') + ' qualifies for adding to entries.')

            type_test = movie.get('kind') in person.get('content_types')
            log.debug('Movie kind: ' + movie.get('kind') + ' found in config types ' +
                      ', '.join(person.get('content_types')) + ': ' + str(type_test))

            if config.get('rating'):
                if not movie.get('rating'):
                    if config.get('strict_mode'):
                        log.debug('Strict mode: Movie does not have rating value, skipping movie.')
                        rating_test = False
                    else:
                        log.debug('Movie does not have rating listed, skipping test.')
                        rating_test = True
                else:
                    rating_test = float(movie.get('rating')) >= config.get('rating', 1)
                    log.debug('Movie rating: ' + str(movie.get('rating')) +
                              ' is higher or equal to: ' + str(config.get('rating', '1')) + ': ' + str(rating_test))
            else:
                log.debug('No rating test required, skipping rating test.')
                rating_test = True

            if config.get('years'):
                if not movie.get('year'):
                    if config.get('strict_mode'):
                        log.debug('Strict mode: Movie does not have year value, skipping movie.')
                        year_test = False
                    else:
                        log.debug('Movie does not have year listed, skipping test.')
                        year_test = True
                else:
                    year_test = (movie.get('year') >= year_range[0] and (movie.get('year') <= year_range[1]))
                    log.debug('Movie year: ' + str((movie.get('year'))) + ' is in given range of '
                              + str(year_range[0]) + ' and ' + str(year_range[1]) + ': ' + str(year_test))
            else:
                log.debug('No years test required, skipping year test.')
                year_test = True

            if config.get('votes'):
                if not movie.get('votes'):
                    if config.get('strict_mode'):
                        log.debug('Strict mode: Movie does not have votes value, skipping movie.')
                        votes_test = False
                    else:
                        log.debug('Movie does not have votes listed, skipping test.')
                        votes_test = True
                else:
                    votes_test = int(movie.get('votes')) >= config.get('votes', 1)
                    log.debug('Movie votes: ' + str(movie.get('votes')) +
                              ' are higher or equal to: ' + str(config.get('votes', '1')) + ': ' + str(votes_test))
            else:
                log.debug('No votes test required, skipping votes test.')
                votes_test = True

            if exclude_list:
                exclude_test = not self.genres_match(exclude_list, movie.get('genres', []))
                log.debug('Exclude genres: ' + ', '.join(
                    exclude_list.get('genres', [])) + ' with match type ' + exclude_list.get(
                    'match_type') + ' are not found in movie genres: ' + ', '.join(movie.get('genres', [])) +
                          ': ' + str(exclude_test))
            else:
                log.debug('No genres exclude test required, skipping exclude list test.')
                exclude_test = True

            if include_list:
                include_test = self.genres_match(include_list, movie.get('genres', []))
                log.debug('Include genres: ' + ', '.join(
                    include_list.get('genres', [])) + ' with match type ' + include_list.get(
                    'match_type') + ' are found in movie genres: ' + ', '.join(movie.get('genres', [])) +
                          ': ' + str(include_test))
            else:
                log.debug('No genres include test required, skipping include list test.')
                include_test = True

            if type_test and rating_test and year_test and votes_test and exclude_test and include_test:
                entry = Entry(title=movie['title'],
                              imdb_id='tt' + ia.get_imdbID(movie),
                              url='')
                if entry.isvalid():
                    if entry not in entries:
                        entries.append(entry)
                        if entry and task.options.test:
                            log.info("Test mode. Entry includes:")
                            log.info("    Title: %s" % entry["title"])
                            log.info("    IMDB ID: %s" % entry["imdb_id"])
                else:
                    log.error('Invalid entry created? %s' % entry)

        if len(entries) <= config.get('max_entries'):
            return entries
        else:
            log.error(
                'Number of entries (%s) exceeds maximum allowed value %s. '
                'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
                    len(entries), config.get('max_entries')))

            return


 @event('plugin.register')
 def register_plugin():
    plugin.register(SmartIMDB, 'smart_imdb', api_ver=2)
	from __future__ import unicode_literals, division, absolute_import

	import logging
	import re
	from jsonschema.compat import str_types
	from flexget import plugin
	from flexget.event import event
	from flexget.entry import Entry

	from imdb import IMDb
	from flexget.config_schema import format_checker

	log = logging.getLogger('smart_imdb')

	ia = IMDb()

	JOB_TYPES = ['actor', 'director', 'producer', 'writer', 'self',
	'editor', 'miscellaneous', 'editorial department', 'cinematographer',
	'visual effects', 'thanks', 'music department']
	CONTENT_TYPES = ['movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode']
	# TODO Improve year regex detection
	YEAR_FORMATS = [r'^((19\|20)\d{2})$', r'^\-((19\|20)\d{2})$', r'^((19\|20)\d{2})\-$', r'^((19\|20)\d{2})\-((19\|20)\d{2}$)']

	MIN_YEAR_RANGE = 1900
	MAX_YEAR_RANGE = 2099


	class SmartIMDB(object):
	genres_schema = {
	'type': 'object',
	'properties': {
	'genres': {"type": "array", "items": {"type": "string"}},
	'match_type': {'type': 'string', 'enum': ['any', 'all', 'exact'], 'default': 'any'}
	},
	'required': ['genres'],
	'additionalProperties': False
	}

	person_schema = {
	'type': 'object',
	'properties': {
	'name': {'type': 'string'},
	'id': {'type': 'string'},
	'job_types': {'type': 'array', 'items': {'type': 'string', 'enum': JOB_TYPES}, 'default': ['actor']},
	'content_types': {'type': 'array', 'items': {'type': 'string', 'enum': CONTENT_TYPES}, 'default': ['movie']}
	},
	'oneOf': [{'required': ['name']}, {'required': ['id']}],
	'error_oneOf': 'Either a name of ID are required.',
	'additionalProperties': False
	}

	schema = {
	'type': 'object',
	'properties': {
	'person': person_schema,
	'include_genres': genres_schema,
	'exclude_genres': genres_schema,
	'rating': {'type': 'number', 'minimum': 0, 'maximum': 10},
	'votes': {'type': 'number'},
	'years': {'type': 'string', 'format': 'year_format'},
	'max_entries': {'type': 'number', 'default': 200},
	'strict_mode': {'type': 'boolean', 'default': False}
	},
	'required': ['person'],
	'additionalProperties': False

	}

	@staticmethod
	def is_imdb_id(string):
	"""
	Checks if a given string matches IMDB id format
	:param string: Supposed IMDB ID
	:return: True if it matches
	"""
	return re.search('[a-zA-Z]{2}(\d{7})', string)

	def get_all_movies_by_person(self, person):
	"""
	Gets a person object from config and return all movies according to parameters
	:param person:
	:return: Movie list
	"""
	# TODO split this to (at least) two methods
	movies = []

	if person.get('id'):
	if self.is_imdb_id(person.get('id')):
	log.debug('Detected IMDB ID %s, resolving person.' % person.get('id'))
	m = self.is_imdb_id(person.get('id'))
	IMDB_person = ia.get_person(m.group(1))
	else:
	log.error('Could not resolve person from IMDB ID %s' % person.get('id'))
	return
	else:
	log.debug('Trying to search for person: %s' % person.get('name'))
	person_list = ia.search_person(person.get('name'))
	if person_list:
	IMDB_person = ia.get_person(person_list[0].personID)
	else:
	return

	# Special case: Actress and actor are different roles.
	if 'actor' in person.get('job_types'):
	person['job_types'].append('actress')

	log.info('Getting movies for %s with the following job types: %s' %
	(IMDB_person['name'], ' ,'.join(person.get('job_types'))))
	for job_type in person['job_types']:
	for content_type in person.get('content_types'):
	job_and_content = job_type + ' ' + content_type
	log.debug('Searching for movies that correlates to: ' + job_and_content)
	movies_by_job_type = IMDB_person.get(job_and_content, IMDB_person.get(job_type))
	if movies_by_job_type:
	for movie in movies_by_job_type:
	if movie not in movies:
	log.debug('Adding movie: ' + movie.get('title'))
	movies.append(movie)
	else:
	log.debug('Movie ' + str(movie) + ' already found in list, skipping.')
	return movies

	def parse_year(self, year=None):
	"""
	Receives 'year_range_format' string and parses it to return start and end dates
	:param year:
	:return: Tuple with start and end year. Uses max an min year values if either one is needed
	"""
	if not year:
	log.debug('No year filter in config, returning defaults')
	return MIN_YEAR_RANGE, MAX_YEAR_RANGE

	for i in range(len(YEAR_FORMATS)):
	m = re.match(YEAR_FORMATS[i], year)
	if m:
	if i == 0:
	log.debug('Matched year regex group ' + str(i))
	return int(m.group(1)), int(m.group(1))
	elif i == 1:
	log.debug('Matched year regex group ' + str(i))
	return MIN_YEAR_RANGE, int(m.group(1))
	elif i == 2:
	log.debug('Matched year regex group ' + str(i))
	return int(m.group(1)), MAX_YEAR_RANGE
	elif i == 3:
	log.debug('Matched year regex group ' + str(i))
	return int(m.group(1)), int(m.group(3))

	return MIN_YEAR_RANGE, MAX_YEAR_RANGE

	def clean_list(self, genres_list):
	"""
	Gets a list and return a new list with lowercase elements
	:param list:
	:return:
	"""
	if not isinstance(genres_list, list):
	return

	new_list = []
	for item in genres_list:
	new_list.append(str(item).lower())
	return new_list

	def genres_match(self, user_genres, movie_genres):
	"""
	Takes a genres object from config schema and tries to match it with movie genres according to match type.
	If match type is 'any', will return true if any of the user specified genres exists in the movie genres list.
	if match type is 'all' will return true if all of the user specified genres exists in the movie genres list.
	if match type is 'exact' will return true if exactly all of the user specified genres exists in the movie genres list.
	:param user_genres: Genres from config
	:param movie_genres: Movie genres
	:return:
	"""
	if not bool(user_genres):
	return True

	user_genres_list = self.clean_list(user_genres.get('genres'))
	movie_genres_list = self.clean_list(movie_genres)
	match_type = user_genres.get('match_type')
	log.debug('Matching user genres: ' + ', '.join(user_genres_list) + ' with movie genres: '
	+ ', '.join(movie_genres_list) + ' and match type: ' + match_type)

	if match_type == 'any':
	return bool(
	set(user_genres_list) & set(movie_genres_list))
	elif match_type == 'all':
	return not bool(
	set(user_genres_list) - set(movie_genres_list))
	elif match_type == 'exact':
	return not bool(
	set(user_genres_list) ^ set(movie_genres_list))

	@format_checker.checks('year_format', raises=ValueError)
	def is_year_format(instance):
	if not isinstance(instance, str_types):
	return True

	for regex in YEAR_FORMATS:
	m = re.match(regex, instance)
	if m:
	return True
	raise ValueError('Invalid year format, or years out of range of %d to %d. Please check config.'
	% (MIN_YEAR_RANGE, MAX_YEAR_RANGE))

	def on_task_input(self, task, config):
	entries = []

	person = config.get('person')
	include_list = config.get('include_genres')
	exclude_list = config.get('exclude_genres')
	movies = self.get_all_movies_by_person(person)

	if not movies:
	log.error('Could not get movie list, check your configuration.')
	return

	year_range = self.parse_year(config.get('years'))
	log.info('Retrieved %d movies, starting to filter.' % len(movies))

	for movie in movies:
	try:
	ia.update(movie)
	except Exception as e:
	log.error('An error has occurred, cannot get movie data: %s' % e)
	continue

	log.debug(
	'Testing if movie: ' + movie.get('long imdb canonical title') + ' qualifies for adding to entries.')

	type_test = movie.get('kind') in person.get('content_types')
	log.debug('Movie kind: ' + movie.get('kind') + ' found in config types ' +
	', '.join(person.get('content_types')) + ': ' + str(type_test))

	if config.get('rating'):
	if not movie.get('rating'):
	if config.get('strict_mode'):
	log.debug('Strict mode: Movie does not have rating value, skipping movie.')
	rating_test = False
	else:
	log.debug('Movie does not have rating listed, skipping test.')
	rating_test = True
	else:
	rating_test = float(movie.get('rating')) >= config.get('rating', 1)
	log.debug('Movie rating: ' + str(movie.get('rating')) +
	' is higher or equal to: ' + str(config.get('rating', '1')) + ': ' + str(rating_test))
	else:
	log.debug('No rating test required, skipping rating test.')
	rating_test = True

	if config.get('years'):
	if not movie.get('year'):
	if config.get('strict_mode'):
	log.debug('Strict mode: Movie does not have year value, skipping movie.')
	year_test = False
	else:
	log.debug('Movie does not have year listed, skipping test.')
	year_test = True
	else:
	year_test = (movie.get('year') >= year_range[0] and (movie.get('year') <= year_range[1]))
	log.debug('Movie year: ' + str((movie.get('year'))) + ' is in given range of '
	+ str(year_range[0]) + ' and ' + str(year_range[1]) + ': ' + str(year_test))
	else:
	log.debug('No years test required, skipping year test.')
	year_test = True

	if config.get('votes'):
	if not movie.get('votes'):
	if config.get('strict_mode'):
	log.debug('Strict mode: Movie does not have votes value, skipping movie.')
	votes_test = False
	else:
	log.debug('Movie does not have votes listed, skipping test.')
	votes_test = True
	else:
	votes_test = int(movie.get('votes')) >= config.get('votes', 1)
	log.debug('Movie votes: ' + str(movie.get('votes')) +
	' are higher or equal to: ' + str(config.get('votes', '1')) + ': ' + str(votes_test))
	else:
	log.debug('No votes test required, skipping votes test.')
	votes_test = True

	if exclude_list:
	exclude_test = not self.genres_match(exclude_list, movie.get('genres', []))
	log.debug('Exclude genres: ' + ', '.join(
	exclude_list.get('genres', [])) + ' with match type ' + exclude_list.get(
	'match_type') + ' are not found in movie genres: ' + ', '.join(movie.get('genres', [])) +
	': ' + str(exclude_test))
	else:
	log.debug('No genres exclude test required, skipping exclude list test.')
	exclude_test = True

	if include_list:
	include_test = self.genres_match(include_list, movie.get('genres', []))
	log.debug('Include genres: ' + ', '.join(
	include_list.get('genres', [])) + ' with match type ' + include_list.get(
	'match_type') + ' are found in movie genres: ' + ', '.join(movie.get('genres', [])) +
	': ' + str(include_test))
	else:
	log.debug('No genres include test required, skipping include list test.')
	include_test = True

	if type_test and rating_test and year_test and votes_test and exclude_test and include_test:
	entry = Entry(title=movie['title'],
	imdb_id='tt' + ia.get_imdbID(movie),
	url='')
	if entry.isvalid():
	if entry not in entries:
	entries.append(entry)
	if entry and task.options.test:
	log.info("Test mode. Entry includes:")
	log.info(" Title: %s" % entry["title"])
	log.info(" IMDB ID: %s" % entry["imdb_id"])
	else:
	log.error('Invalid entry created? %s' % entry)

	if len(entries) <= config.get('max_entries'):
	return entries
	else:
	log.error(
	'Number of entries (%s) exceeds maximum allowed value %s. '
	'Edit your filters or raise the maximum value by entering a higher "max_entries"' % (
	len(entries), config.get('max_entries')))

	return


	@event('plugin.register')
	def register_plugin():
	plugin.register(SmartIMDB, 'smart_imdb', api_ver=2)
No results found