Skip to content

Instantly share code, notes, and snippets.

@vangheem
Last active May 8, 2017 22:54
Show Gist options
  • Select an option

  • Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.

Select an option

Save vangheem/d76837fbd813ed7ebebf84a3c24e504d to your computer and use it in GitHub Desktop.
Export all your tablo vidoes
#
# Requirements:
# - Python >= 3.5
# - requests
# - aiohttp
#
import argparse
import asyncio
import json
import os
import shutil
import tempfile
import aiohttp
import requests
from lxml.html import fromstring
parser = argparse.ArgumentParser(description='Download all tablo videos.')
parser.add_argument('--output', dest='output', default='./videos')
parser.add_argument('--ip', dest='ip', default='192.168.1.43')
parser.add_argument('--concurrency', type=int, default=4)
args = parser.parse_args()
ENDPOINT = 'http://{}:18080'.format(args.ip)
OUTPUT = args.output
MAX_SIMULTANEOUS_DOWNLOAD = 32
CONCURRENCY = args.concurrency
def get_videos():
resp = requests.get(f'{ENDPOINT}/pvr')
dom = fromstring(resp.content)
videos = []
for anchor in dom.cssselect('tr td.n a'):
video = anchor.text_content().strip()
if video.isdigit():
videos.append(video)
return videos
def get_meta(video_id):
resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
return resp.json()
async def download_segment(session, output_filename, video_id, seg_name):
print(f'download segment {seg_name} for {output_filename}')
resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
return await resp.read()
async def download(video_id):
meta = get_meta(video_id)
resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
dom = fromstring(resp.content)
tmp_dir = tempfile.mkdtemp()
show = meta['recEpisode']['jsonFromTribune']['program']
number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}"
output_filename = f"{show['title']} - {number} - {show['episodeTitle']}"
ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
output_filepath = f'{OUTPUT}/{output_filename}.mp4'
meta_dir = os.path.join(OUTPUT, 'meta')
if not os.path.exists(meta_dir):
os.mkdir(meta_dir)
meta_filepath = os.path.join(meta_dir, output_filename + '.json')
fi = open(meta_filepath, 'w')
fi.write(json.dumps(meta))
fi.close()
if os.path.exists(output_filepath):
print('Skipping {output_filepath}, already downloaded')
return
segments = dom.cssselect('tr td.n')
batch = []
count = 0
session = aiohttp.ClientSession()
for seg in segments:
count += 1
seg_name = seg.text_content()
if seg_name in ('Parent Directory/',):
continue
batch.append(download_segment(session, output_filename, video_id, seg_name))
if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
fi = open(ts_filepath, 'ab')
for file_chunk in await asyncio.gather(*batch):
fi.write(file_chunk)
fi.close()
print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
batch = []
fi = open(ts_filepath, 'ab')
for file_chunk in await asyncio.gather(*batch):
fi.write(file_chunk)
fi.close()
cmd = [
'ffmpeg', '-y', '-i', ts_filepath,
'-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
'-c', 'copy', f'{output_filepath}']
print(f'Executing: {" ".join(cmd)}')
process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
await process.communicate()
await session.close()
shutil.rmtree(tmp_dir)
async def download_all():
videos = get_videos()
batch = []
for video in videos:
batch.append(download(video))
if len(batch) >= CONCURRENCY:
await asyncio.gather(*batch)
batch = []
await asyncio.gather(*batch)
if __name__ == '__main__':
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(download_all())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment