Last active
May 8, 2017 22:54
-
-
Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.
Revisions
-
vangheem revised this gist
May 7, 2017 . 1 changed file with 89 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -50,17 +50,101 @@ async def download_segment(session, output_filename, video_id, seg_name): return await resp.read() class Show: def __init__(self, meta): self.meta = meta @property def program(self): return self.meta['recEpisode']['jsonFromTribune']['program'] @property def series(self): return self.meta['recSeries']['jsonForClient'] @property def season(self): return self.meta['recSeason']['jsonForClient'] @property def episode(self): return self.meta['recEpisode']['jsonForClient'] @property def episode_title(self): try: return self.program['episodeTitle'] except KeyError: try: return self.season['episodeTitle'] except KeyError: return self.episode['title'] @property def title(self): try: return self.program['title'] except KeyError: return self.series['title'] @property def episode_number(self): try: return str(self.program['episodeNum']).zfill(2) except KeyError: return str(self.episode['episodeNumber']).zfill(2) @property def season_number(self): try: return str(self.program['seasonNum']).zfill(2) except KeyError: return str(self.season['seasonNumber']).zfill(2) @property def identifier(self): if self.valid_season: return f's{self.season_number}e{self.episode_number}' else: return self.program['seriesId'] @property def valid_season(self): return self.season_number != '00' and self.episode_number != '00' @property def is_movie(self): return not self.valid_season and 'jsonFromTribune' not in self.meta['recEpisode'] @property def year(self): return self.series['originalAirDate'].split('-')[0] async def download(video_id): meta = get_meta(video_id) resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs') dom = fromstring(resp.content) tmp_dir = tempfile.mkdtemp() show = Show(meta) if show.is_movie: output_dir = os.path.join(OUTPUT, 'Movies') output_filename = f"{show.title} ({show.year})" else: output_filename = f"{show.title} - {show.identifier} - {show.episode_title}" if show.valid_season: output_dir = os.path.join(OUTPUT, 'TV Shows', show.title, f"Season {show.season_number}") else: output_dir = os.path.join(OUTPUT, 'TV Shows', show.title) ts_filepath = os.path.join(tmp_dir, output_filename + '.ts') if not os.path.exists(output_dir): os.makedirs(output_dir) output_filepath = f'{output_dir}/{output_filename}.mp4' meta_dir = os.path.join(OUTPUT, 'meta') if not os.path.exists(meta_dir): @@ -71,7 +155,7 @@ async def download(video_id): fi.close() if os.path.exists(output_filepath): print(f'Skipping {output_filepath}, already downloaded') return segments = dom.cssselect('tr td.n') -
vangheem created this gist
May 6, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,124 @@ # # Requirements: # - Python >= 3.5 # - requests # - aiohttp # import argparse import asyncio import json import os import shutil import tempfile import aiohttp import requests from lxml.html import fromstring parser = argparse.ArgumentParser(description='Download all tablo videos.') parser.add_argument('--output', dest='output', default='./videos') parser.add_argument('--ip', dest='ip', default='192.168.1.43') parser.add_argument('--concurrency', type=int, default=4) args = parser.parse_args() ENDPOINT = 'http://{}:18080'.format(args.ip) OUTPUT = args.output MAX_SIMULTANEOUS_DOWNLOAD = 32 CONCURRENCY = args.concurrency def get_videos(): resp = requests.get(f'{ENDPOINT}/pvr') dom = fromstring(resp.content) videos = [] for anchor in dom.cssselect('tr td.n a'): video = anchor.text_content().strip() if video.isdigit(): videos.append(video) return videos def get_meta(video_id): resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt') return resp.json() async def download_segment(session, output_filename, video_id, seg_name): print(f'download segment {seg_name} for {output_filename}') resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}') return await resp.read() async def download(video_id): meta = get_meta(video_id) resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs') dom = fromstring(resp.content) tmp_dir = tempfile.mkdtemp() show = meta['recEpisode']['jsonFromTribune']['program'] number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}" output_filename = f"{show['title']} - {number} - {show['episodeTitle']}" ts_filepath = os.path.join(tmp_dir, output_filename + '.ts') output_filepath = f'{OUTPUT}/{output_filename}.mp4' meta_dir = os.path.join(OUTPUT, 'meta') if not os.path.exists(meta_dir): os.mkdir(meta_dir) meta_filepath = os.path.join(meta_dir, output_filename + '.json') fi = open(meta_filepath, 'w') fi.write(json.dumps(meta)) fi.close() if os.path.exists(output_filepath): print('Skipping {output_filepath}, already downloaded') return segments = dom.cssselect('tr td.n') batch = [] count = 0 session = aiohttp.ClientSession() for seg in segments: count += 1 seg_name = seg.text_content() if seg_name in ('Parent Directory/',): continue batch.append(download_segment(session, output_filename, video_id, seg_name)) if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY): fi = open(ts_filepath, 'ab') for file_chunk in await asyncio.gather(*batch): fi.write(file_chunk) fi.close() print(f'Downloaded ({count}/{len(segments)}) of {output_filename}') batch = [] fi = open(ts_filepath, 'ab') for file_chunk in await asyncio.gather(*batch): fi.write(file_chunk) fi.close() cmd = [ 'ffmpeg', '-y', '-i', ts_filepath, '-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2', '-c', 'copy', f'{output_filepath}'] print(f'Executing: {" ".join(cmd)}') process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE) await process.communicate() await session.close() shutil.rmtree(tmp_dir) async def download_all(): videos = get_videos() batch = [] for video in videos: batch.append(download(video)) if len(batch) >= CONCURRENCY: await asyncio.gather(*batch) batch = [] await asyncio.gather(*batch) if __name__ == '__main__': event_loop = asyncio.get_event_loop() event_loop.run_until_complete(download_all())