vangheem · May 8, 2017 22:54 · May 7, 2017 · May 6, 2017
diff --git a/tablo-export.py b/tablo-export.py
@@ -50,17 +50,101 @@ async def download_segment(session, output_filename, video_id, seg_name):
     return await resp.read()
 
 
+class Show:
+
+    def __init__(self, meta):
+        self.meta = meta
+
+    @property
+    def program(self):
+        return self.meta['recEpisode']['jsonFromTribune']['program']
+
+    @property
+    def series(self):
+        return self.meta['recSeries']['jsonForClient']
+
+    @property
+    def season(self):
+        return self.meta['recSeason']['jsonForClient']
+
+    @property
+    def episode(self):
+        return self.meta['recEpisode']['jsonForClient']
+
+    @property
+    def episode_title(self):
+        try:
+            return self.program['episodeTitle']
+        except KeyError:
+            try:
+                return self.season['episodeTitle']
+            except KeyError:
+                return self.episode['title']
+
+    @property
+    def title(self):
+        try:
+            return self.program['title']
+        except KeyError:
+            return self.series['title']
+
+    @property
+    def episode_number(self):
+        try:
+            return str(self.program['episodeNum']).zfill(2)
+        except KeyError:
+            return str(self.episode['episodeNumber']).zfill(2)
+
+    @property
+    def season_number(self):
+        try:
+            return str(self.program['seasonNum']).zfill(2)
+        except KeyError:
+            return str(self.season['seasonNumber']).zfill(2)
+
+    @property
+    def identifier(self):
+        if self.valid_season:
+            return f's{self.season_number}e{self.episode_number}'
+        else:
+            return self.program['seriesId']
+
+    @property
+    def valid_season(self):
+        return self.season_number != '00' and self.episode_number != '00'
+
+    @property
+    def is_movie(self):
+        return not self.valid_season and 'jsonFromTribune' not in self.meta['recEpisode']
+
+    @property
+    def year(self):
+        return self.series['originalAirDate'].split('-')[0]
+
+
 async def download(video_id):
     meta = get_meta(video_id)
     resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
     dom = fromstring(resp.content)
 
     tmp_dir = tempfile.mkdtemp()
-    show = meta['recEpisode']['jsonFromTribune']['program']
-    number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}"
-    output_filename = f"{show['title']} - {number} - {show['episodeTitle']}"
+    show = Show(meta)
+
+    if show.is_movie:
+        output_dir = os.path.join(OUTPUT, 'Movies')
+        output_filename = f"{show.title} ({show.year})"
+    else:
+        output_filename = f"{show.title} - {show.identifier} - {show.episode_title}"
+        if show.valid_season:
+            output_dir = os.path.join(OUTPUT, 'TV Shows', show.title,
+                                      f"Season {show.season_number}")
+        else:
+            output_dir = os.path.join(OUTPUT, 'TV Shows', show.title)
+
     ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
-    output_filepath = f'{OUTPUT}/{output_filename}.mp4'
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    output_filepath = f'{output_dir}/{output_filename}.mp4'
 
     meta_dir = os.path.join(OUTPUT, 'meta')
     if not os.path.exists(meta_dir):
@@ -71,7 +155,7 @@ async def download(video_id):
     fi.close()
 
     if os.path.exists(output_filepath):
-        print('Skipping {output_filepath}, already downloaded')
+        print(f'Skipping {output_filepath}, already downloaded')
         return
 
     segments = dom.cssselect('tr td.n')

diff --git a/tablo-export.py b/tablo-export.py
@@ -0,0 +1,124 @@
+#
+# Requirements:
+#  - Python >= 3.5
+#  - requests
+#  - aiohttp
+#
+import argparse
+import asyncio
+import json
+import os
+import shutil
+import tempfile
+
+import aiohttp
+import requests
+from lxml.html import fromstring
+
+parser = argparse.ArgumentParser(description='Download all tablo videos.')
+parser.add_argument('--output', dest='output', default='./videos')
+parser.add_argument('--ip', dest='ip', default='192.168.1.43')
+parser.add_argument('--concurrency', type=int, default=4)
+args = parser.parse_args()
+
+
+ENDPOINT = 'http://{}:18080'.format(args.ip)
+OUTPUT = args.output
+MAX_SIMULTANEOUS_DOWNLOAD = 32
+CONCURRENCY = args.concurrency
+
+
+def get_videos():
+    resp = requests.get(f'{ENDPOINT}/pvr')
+    dom = fromstring(resp.content)
+    videos = []
+    for anchor in dom.cssselect('tr td.n a'):
+        video = anchor.text_content().strip()
+        if video.isdigit():
+            videos.append(video)
+    return videos
+
+
+def get_meta(video_id):
+    resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
+    return resp.json()
+
+
+async def download_segment(session, output_filename, video_id, seg_name):
+    print(f'download segment {seg_name} for {output_filename}')
+    resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
+    return await resp.read()
+
+
+async def download(video_id):
+    meta = get_meta(video_id)
+    resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
+    dom = fromstring(resp.content)
+
+    tmp_dir = tempfile.mkdtemp()
+    show = meta['recEpisode']['jsonFromTribune']['program']
+    number = f"s{str(show['seasonNum']).zfill(2)}e{str(show['episodeNum']).zfill(2)}"
+    output_filename = f"{show['title']} - {number} - {show['episodeTitle']}"
+    ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
+    output_filepath = f'{OUTPUT}/{output_filename}.mp4'
+
+    meta_dir = os.path.join(OUTPUT, 'meta')
+    if not os.path.exists(meta_dir):
+        os.mkdir(meta_dir)
+    meta_filepath = os.path.join(meta_dir, output_filename + '.json')
+    fi = open(meta_filepath, 'w')
+    fi.write(json.dumps(meta))
+    fi.close()
+
+    if os.path.exists(output_filepath):
+        print('Skipping {output_filepath}, already downloaded')
+        return
+
+    segments = dom.cssselect('tr td.n')
+    batch = []
+    count = 0
+    session = aiohttp.ClientSession()
+    for seg in segments:
+        count += 1
+        seg_name = seg.text_content()
+        if seg_name in ('Parent Directory/',):
+            continue
+        batch.append(download_segment(session, output_filename, video_id, seg_name))
+
+        if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
+            fi = open(ts_filepath, 'ab')
+            for file_chunk in await asyncio.gather(*batch):
+                fi.write(file_chunk)
+            fi.close()
+            print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
+            batch = []
+    fi = open(ts_filepath, 'ab')
+    for file_chunk in await asyncio.gather(*batch):
+        fi.write(file_chunk)
+    fi.close()
+
+    cmd = [
+        'ffmpeg', '-y', '-i', ts_filepath,
+        '-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
+        '-c', 'copy', f'{output_filepath}']
+    print(f'Executing: {" ".join(cmd)}')
+    process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
+    await process.communicate()
+    await session.close()
+    shutil.rmtree(tmp_dir)
+
+
+async def download_all():
+    videos = get_videos()
+    batch = []
+    for video in videos:
+        batch.append(download(video))
+        if len(batch) >= CONCURRENCY:
+            await asyncio.gather(*batch)
+            batch = []
+    await asyncio.gather(*batch)
+
+
+if __name__ == '__main__':
+    event_loop = asyncio.get_event_loop()
+    event_loop.run_until_complete(download_all())
No results found