Last active
April 5, 2021 06:55
-
-
Save RogerRordo/937d37a6bd19c20f1ce094cfe4fb48ea to your computer and use it in GitHub Desktop.
20201212-临时解决youtube_dl无法下载自动字幕的问题
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #替换youtube-dl/extractor/youtube.py的_get_automatic_captions函数 | |
| def _get_automatic_captions(self, video_id, webpage): | |
| """We need the webpage for getting the captions url, pass it as an | |
| argument to speed up the process.""" | |
| self.to_screen('%s: Looking for automatic captions' % video_id) | |
| err_msg = 'Couldn\'t find automatic captions for %s' % video_id | |
| pattern = r'var\s*ytInitialPlayerResponse\s*=\s*({.+?});' | |
| response = self._search_regex(pattern, | |
| webpage, | |
| 'yt initial player response', | |
| default=None) | |
| if response: | |
| yt_initial_player_response = self._parse_json( | |
| uppercase_escape(response), video_id, fatal=False) | |
| if not yt_initial_player_response: | |
| self._downloader.report_warning(err_msg) | |
| return {} | |
| else: | |
| self._downloader.report_warning(err_msg) | |
| return {} | |
| try: | |
| def make_captions(sub_url, sub_langs): | |
| parsed_sub_url = compat_urllib_parse_urlparse(sub_url) | |
| caption_qs = compat_parse_qs(parsed_sub_url.query) | |
| captions = {} | |
| for sub_lang in sub_langs: | |
| sub_formats = [] | |
| for ext in self._SUBTITLE_FORMATS: | |
| caption_qs.update({ | |
| 'tlang': [sub_lang], | |
| 'fmt': [ext], | |
| }) | |
| sub_url = compat_urlparse.urlunparse( | |
| parsed_sub_url._replace( | |
| query=compat_urllib_parse_urlencode( | |
| caption_qs, True))) | |
| sub_formats.append({ | |
| 'url': sub_url, | |
| 'ext': ext, | |
| }) | |
| captions[sub_lang] = sub_formats | |
| return captions | |
| # New captions format as of 22.06.2017 | |
| if yt_initial_player_response: | |
| renderer = yt_initial_player_response['captions'][ | |
| 'playerCaptionsTracklistRenderer'] | |
| base_url = renderer['captionTracks'][0]['baseUrl'] | |
| sub_lang_list = [] | |
| for lang in renderer['translationLanguages']: | |
| lang_code = lang.get('languageCode') | |
| if lang_code: | |
| sub_lang_list.append(lang_code) | |
| return make_captions(base_url, sub_lang_list) | |
| # An extractor error can be raise by the download process if there are | |
| # no automatic captions but there are subtitles | |
| except (KeyError, IndexError, ExtractorError): | |
| self._downloader.report_warning(err_msg) | |
| return {} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ydl升级到12.14后字幕又不能下载,后发现目录变更了,得改这个目录下的文件:
Python36\Lib\site-packages\youtube_dl-2020.12.14-py3.6.egg\youtube_dl\extractor\youtube.py
另:ydl两个字幕不能下载的帖子不知为何被锁定,无法交流