Last active
May 8, 2017 22:54
-
-
Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.
Export all your tablo vidoes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # | |
| # Requirements: | |
| # - Python >= 3.5 | |
| # - requests | |
| # - aiohttp | |
| # | |
| import argparse | |
| import asyncio | |
| import json | |
| import os | |
| import shutil | |
| import tempfile | |
| import aiohttp | |
| import requests | |
| from lxml.html import fromstring | |
| parser = argparse.ArgumentParser(description='Download all tablo videos.') | |
| parser.add_argument('--output', dest='output', default='./videos') | |
| parser.add_argument('--ip', dest='ip', default='192.168.1.43') | |
| parser.add_argument('--concurrency', type=int, default=4) | |
| args = parser.parse_args() | |
| ENDPOINT = 'http://{}:18080'.format(args.ip) | |
| OUTPUT = args.output | |
| MAX_SIMULTANEOUS_DOWNLOAD = 32 | |
| CONCURRENCY = args.concurrency | |
| def get_videos(): | |
| resp = requests.get(f'{ENDPOINT}/pvr') | |
| dom = fromstring(resp.content) | |
| videos = [] | |
| for anchor in dom.cssselect('tr td.n a'): | |
| video = anchor.text_content().strip() | |
| if video.isdigit(): | |
| videos.append(video) | |
| return videos | |
| def get_meta(video_id): | |
| resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt') | |
| return resp.json() | |
| async def download_segment(session, output_filename, video_id, seg_name): | |
| print(f'download segment {seg_name} for {output_filename}') | |
| resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}') | |
| return await resp.read() | |
| async def download(video_id): | |
| meta = get_meta(video_id) | |
| resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs') | |
| dom = fromstring(resp.content) | |
| tmp_dir = tempfile.mkdtemp() | |
| show = meta['recEpisode']['jsonFromTribune']['program'] | |
| number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}" | |
| output_filename = f"{show['title']} - {number} - {show['episodeTitle']}" | |
| ts_filepath = os.path.join(tmp_dir, output_filename + '.ts') | |
| output_filepath = f'{OUTPUT}/{output_filename}.mp4' | |
| meta_dir = os.path.join(OUTPUT, 'meta') | |
| if not os.path.exists(meta_dir): | |
| os.mkdir(meta_dir) | |
| meta_filepath = os.path.join(meta_dir, output_filename + '.json') | |
| fi = open(meta_filepath, 'w') | |
| fi.write(json.dumps(meta)) | |
| fi.close() | |
| if os.path.exists(output_filepath): | |
| print('Skipping {output_filepath}, already downloaded') | |
| return | |
| segments = dom.cssselect('tr td.n') | |
| batch = [] | |
| count = 0 | |
| session = aiohttp.ClientSession() | |
| for seg in segments: | |
| count += 1 | |
| seg_name = seg.text_content() | |
| if seg_name in ('Parent Directory/',): | |
| continue | |
| batch.append(download_segment(session, output_filename, video_id, seg_name)) | |
| if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY): | |
| fi = open(ts_filepath, 'ab') | |
| for file_chunk in await asyncio.gather(*batch): | |
| fi.write(file_chunk) | |
| fi.close() | |
| print(f'Downloaded ({count}/{len(segments)}) of {output_filename}') | |
| batch = [] | |
| fi = open(ts_filepath, 'ab') | |
| for file_chunk in await asyncio.gather(*batch): | |
| fi.write(file_chunk) | |
| fi.close() | |
| cmd = [ | |
| 'ffmpeg', '-y', '-i', ts_filepath, | |
| '-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2', | |
| '-c', 'copy', f'{output_filepath}'] | |
| print(f'Executing: {" ".join(cmd)}') | |
| process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE) | |
| await process.communicate() | |
| await session.close() | |
| shutil.rmtree(tmp_dir) | |
| async def download_all(): | |
| videos = get_videos() | |
| batch = [] | |
| for video in videos: | |
| batch.append(download(video)) | |
| if len(batch) >= CONCURRENCY: | |
| await asyncio.gather(*batch) | |
| batch = [] | |
| await asyncio.gather(*batch) | |
| if __name__ == '__main__': | |
| event_loop = asyncio.get_event_loop() | |
| event_loop.run_until_complete(download_all()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment