vangheem · May 8, 2017 22:54
diff --git a/tablo-export.py b/tablo-export.py
 #
 # Requirements:
 #  - Python >= 3.5
 #  - requests
 #  - aiohttp
 #
 import argparse
 import asyncio
 import json
 import os
 import shutil
 import tempfile

 import aiohttp
 import requests
 from lxml.html import fromstring

 parser = argparse.ArgumentParser(description='Download all tablo videos.')
 parser.add_argument('--output', dest='output', default='./videos')
 parser.add_argument('--ip', dest='ip', default='192.168.1.43')
 parser.add_argument('--concurrency', type=int, default=4)
 args = parser.parse_args()


 ENDPOINT = 'http://{}:18080'.format(args.ip)
 OUTPUT = args.output
 MAX_SIMULTANEOUS_DOWNLOAD = 32
 CONCURRENCY = args.concurrency


 def get_videos():
    resp = requests.get(f'{ENDPOINT}/pvr')
    dom = fromstring(resp.content)
    videos = []
    for anchor in dom.cssselect('tr td.n a'):
        video = anchor.text_content().strip()
        if video.isdigit():
            videos.append(video)
    return videos


 def get_meta(video_id):
    resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
    return resp.json()


 async def download_segment(session, output_filename, video_id, seg_name):
    print(f'download segment {seg_name} for {output_filename}')
    resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
    return await resp.read()


 async def download(video_id):
    meta = get_meta(video_id)
    resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
    dom = fromstring(resp.content)

    tmp_dir = tempfile.mkdtemp()
    show = meta['recEpisode']['jsonFromTribune']['program']
    number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}"
    output_filename = f"{show['title']} - {number} - {show['episodeTitle']}"
    ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
    output_filepath = f'{OUTPUT}/{output_filename}.mp4'

    meta_dir = os.path.join(OUTPUT, 'meta')
    if not os.path.exists(meta_dir):
        os.mkdir(meta_dir)
    meta_filepath = os.path.join(meta_dir, output_filename + '.json')
    fi = open(meta_filepath, 'w')
    fi.write(json.dumps(meta))
    fi.close()

    if os.path.exists(output_filepath):
        print('Skipping {output_filepath}, already downloaded')
        return

    segments = dom.cssselect('tr td.n')
    batch = []
    count = 0
    session = aiohttp.ClientSession()
    for seg in segments:
        count += 1
        seg_name = seg.text_content()
        if seg_name in ('Parent Directory/',):
            continue
        batch.append(download_segment(session, output_filename, video_id, seg_name))

        if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
            fi = open(ts_filepath, 'ab')
            for file_chunk in await asyncio.gather(*batch):
                fi.write(file_chunk)
            fi.close()
            print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
            batch = []
    fi = open(ts_filepath, 'ab')
    for file_chunk in await asyncio.gather(*batch):
        fi.write(file_chunk)
    fi.close()

    cmd = [
        'ffmpeg', '-y', '-i', ts_filepath,
        '-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
        '-c', 'copy', f'{output_filepath}']
    print(f'Executing: {" ".join(cmd)}')
    process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
    await process.communicate()
    await session.close()
    shutil.rmtree(tmp_dir)


 async def download_all():
    videos = get_videos()
    batch = []
    for video in videos:
        batch.append(download(video))
        if len(batch) >= CONCURRENCY:
            await asyncio.gather(*batch)
            batch = []
    await asyncio.gather(*batch)


 if __name__ == '__main__':
    event_loop = asyncio.get_event_loop()
    event_loop.run_until_complete(download_all())
	#
	# Requirements:
	# - Python >= 3.5
	# - requests
	# - aiohttp
	#
	import argparse
	import asyncio
	import json
	import os
	import shutil
	import tempfile

	import aiohttp
	import requests
	from lxml.html import fromstring

	parser = argparse.ArgumentParser(description='Download all tablo videos.')
	parser.add_argument('--output', dest='output', default='./videos')
	parser.add_argument('--ip', dest='ip', default='192.168.1.43')
	parser.add_argument('--concurrency', type=int, default=4)
	args = parser.parse_args()


	ENDPOINT = 'http://{}:18080'.format(args.ip)
	OUTPUT = args.output
	MAX_SIMULTANEOUS_DOWNLOAD = 32
	CONCURRENCY = args.concurrency


	def get_videos():
	resp = requests.get(f'{ENDPOINT}/pvr')
	dom = fromstring(resp.content)
	videos = []
	for anchor in dom.cssselect('tr td.n a'):
	video = anchor.text_content().strip()
	if video.isdigit():
	videos.append(video)
	return videos


	def get_meta(video_id):
	resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
	return resp.json()


	async def download_segment(session, output_filename, video_id, seg_name):
	print(f'download segment {seg_name} for {output_filename}')
	resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
	return await resp.read()


	async def download(video_id):
	meta = get_meta(video_id)
	resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
	dom = fromstring(resp.content)

	tmp_dir = tempfile.mkdtemp()
	show = meta['recEpisode']['jsonFromTribune']['program']
	number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}"
	output_filename = f"{show['title']} - {number} - {show['episodeTitle']}"
	ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
	output_filepath = f'{OUTPUT}/{output_filename}.mp4'

	meta_dir = os.path.join(OUTPUT, 'meta')
	if not os.path.exists(meta_dir):
	os.mkdir(meta_dir)
	meta_filepath = os.path.join(meta_dir, output_filename + '.json')
	fi = open(meta_filepath, 'w')
	fi.write(json.dumps(meta))
	fi.close()

	if os.path.exists(output_filepath):
	print('Skipping {output_filepath}, already downloaded')
	return

	segments = dom.cssselect('tr td.n')
	batch = []
	count = 0
	session = aiohttp.ClientSession()
	for seg in segments:
	count += 1
	seg_name = seg.text_content()
	if seg_name in ('Parent Directory/',):
	continue
	batch.append(download_segment(session, output_filename, video_id, seg_name))

	if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
	fi = open(ts_filepath, 'ab')
	for file_chunk in await asyncio.gather(*batch):
	fi.write(file_chunk)
	fi.close()
	print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
	batch = []
	fi = open(ts_filepath, 'ab')
	for file_chunk in await asyncio.gather(*batch):
	fi.write(file_chunk)
	fi.close()

	cmd = [
	'ffmpeg', '-y', '-i', ts_filepath,
	'-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
	'-c', 'copy', f'{output_filepath}']
	print(f'Executing: {" ".join(cmd)}')
	process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
	await process.communicate()
	await session.close()
	shutil.rmtree(tmp_dir)


	async def download_all():
	videos = get_videos()
	batch = []
	for video in videos:
	batch.append(download(video))
	if len(batch) >= CONCURRENCY:
	await asyncio.gather(*batch)
	batch = []
	await asyncio.gather(*batch)


	if __name__ == '__main__':
	event_loop = asyncio.get_event_loop()
	event_loop.run_until_complete(download_all())
No results found