Skip to content

Instantly share code, notes, and snippets.

@StarkGang
Last active December 20, 2025 04:05
Show Gist options
  • Select an option

  • Save StarkGang/ad3c71f70097206fc81c0fd7958fe367 to your computer and use it in GitHub Desktop.

Select an option

Save StarkGang/ad3c71f70097206fc81c0fd7958fe367 to your computer and use it in GitHub Desktop.

Revisions

  1. StarkGang revised this gist Nov 24, 2022. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion urls.txt
    Original file line number Diff line number Diff line change
    @@ -1 +0,0 @@
    https://www.youtube.com/watch?v=dQw4w9WgXcQ
  2. StarkGang renamed this gist Nov 24, 2022. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. StarkGang created this gist Nov 24, 2022.
    191 changes: 191 additions & 0 deletions main.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,191 @@
    import os
    from dateutil.tz import tzlocal
    import time
    import logging
    import traceback
    import multiprocessing
    from apscheduler.schedulers.background import BlockingScheduler
    from concurrent.futures.thread import ThreadPoolExecutor
    import http.client as httplib
    from concurrent.futures import as_completed
    import contextlib
    import youtube_dl
    import datetime
    import sys


    class Helper:
    def __init__(self) -> None:
    pass

    def try_returning_a_int(self, arg: str):
    try:
    return int(arg)
    except ValueError:
    return str(arg).lower()

    def strip_time_from_input(self, arg: str):
    try:
    dt_obj = datetime.datetime.strptime(arg, '%I:%M:%p')
    except Exception:
    return None, None
    return dt_obj.hour, (dt_obj.minute or 0)

    def is_interactive_shell(self):
    return sys.__stdin__.isatty()

    def give_out_bool(self, decision):
    digit_as_dec = self.try_returning_a_int(decision)
    if digit_as_dec in [1, 'yes', 'ya', 'yeh', 'yep', 'y', 'ok', 'haan']:
    return True
    return False

    class Downloader:
    def __init__(self) -> None:
    self.urls = []
    self.task_len = 0
    self.GOOGLE_DNS = "8.8.8.8"
    self.executor = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count() * 2)

    def logging_setup(self):
    logging.basicConfig(
    level=logging.INFO,
    datefmt="[%d/%m/%Y %H:%M:%S]",
    format="%(asctime)s - [Rogue-Downloader] >> %(levelname)s << %(message)s",
    handlers=[logging.FileHandler("rogue-dl.log"), logging.StreamHandler()],
    )
    logging.getLogger("apscheduler").setLevel(logging.WARNING)

    @staticmethod
    def log(
    message: str = None,
    level=logging.INFO,
    logger: logging.Logger = logging.getLogger(__module__),
    ) -> str:
    logger.log(level, message or traceback.format_exc())
    return message or traceback.format_exc()

    def rem_link(self, link):
    with open('./urls.txt', 'r') as readable:
    content = readable.readlines()
    if link in content:
    content.remove(link)
    with open("./urls.txt", "w") as writeable:
    writeable.writelines(content)


    def read_file_return_list(self, file_path: str = './urls.txt'):
    file_obj = open(file_path)
    urls = [url.strip("\n") for url in file_obj]
    self.urls = urls

    def downloader(self):
    conn = False
    self.log('Checking For Data Connection...')
    while not conn:
    conn = self.data_check()
    self.log('Data Connection Not Found.. Sleeping for 5s and retrying..')
    time.sleep(5)
    self.log("Connection initiated... Starting Program.")
    self.log(f'Function Called at : {time.time()}')
    time_st = time.perf_counter()
    logging.info('Fetching URL(s) from the file and storing in :memory: \n')
    self.read_file_return_list()
    logging.info(f'Above Task | [Completed in {round(time.perf_counter() - time_st)}s] \n')
    logging.info('Begining new download process in few seconds :\n ')
    urls = self.urls
    futures = [self.executor.submit(self.download, url) for url in urls]
    for future in as_completed(futures):
    if bool_ := future.result():
    self.log(f'Download Success : #{self.task_len}')
    else:
    self.log(f'Download Failed : #{self.task_len}')
    logging.info(f'Above Task | [Completed in {round(time.perf_counter() - time_st)}s] \n')

    def download(self, url):
    opts = {
    'format': 'best',
    'addmetadata': True,
    'key': 'FFmpegMetadata',
    'prefer_ffmpeg': True,
    'geo_bypass': True,
    'nocheckcertificate': True,
    'outtmpl': './%(playlist_title)s/%(title)s.mp4',
    'postprocessors': [{
    'key': 'FFmpegVideoConvertor',
    'preferedformat': 'mp4'
    }],
    'logtostderr': False,
    }
    self.task_len += 1
    client = youtube_dl.YoutubeDL(opts)
    error_c = 0
    while error_c <= 4:
    try:
    ei = client.extract_info(url)
    except Exception as e:
    error_c += 1
    self.log(f'#{error_c} - An error was raised : {url} \nException : {e}')
    with contextlib.suppress(Exception):
    self.log(f"Downloaded : {ei.get('title')} from {ei.get('uploader')} with url {url}")
    self.rem_link(url)
    return True

    def data_check(self):
    init_connection = httplib.HTTPSConnection(self.GOOGLE_DNS, timeout=5)
    try:
    init_connection.request("HEAD", "/")
    return True
    except Exception:
    return False
    finally:
    init_connection.close()


    def shut_down(self, scheduler: BlockingScheduler):
    scheduler.remove_all_jobs()
    scheduler.shutdown(False)
    logging.warning('6 AM : Shutting Down all threads forcefully..')
    os._exit(0) # exit all threads


    dl_class = Downloader()
    dl_class.logging_setup()
    helper_class = Helper()
    scheduler = BlockingScheduler(timezone=tzlocal())
    if helper_class.is_interactive_shell():
    should_use_sch = helper_class.give_out_bool(input('Do you wish to schedule this or run now? (Y/n) :\n'))
    if should_use_sch:
    hour, min = 0, 0
    max_try = 0
    while not hour:
    if max_try >= 5:
    dl_class.log('Alright, Max input recived.. Try again later! Byee...')
    break
    elif max_try > 0:
    dl_class.log('Try again. Please Give Valid Input.')
    time_date = input("Alright give me time input in the format : HH:MM:AM/PM. Example : 12:00:AM :\n")
    hour, min = helper_class.strip_time_from_input(time_date)
    max_try += 1
    if hour:
    scheduler.add_job(dl_class.downloader, trigger="cron", hour=hour, minute=min)
    should_exit_script = helper_class.give_out_bool(input("Should exit the process at specficied time? Don't worry you can always resume download process... (y/n) : \n"))
    if should_exit_script:
    hour, min = 0, 0
    max_try = 0
    while not hour:
    if max_try >= 5:
    dl_class.log('Alright, Max input recived.. Not scheduling a exit..')
    break
    elif max_try > 0:
    dl_class.log('Try again. Please Give Valid Input.')
    time_date = input("Alright give me time input in the format : HH:MM:AM/PM. Example : 12:00:AM \n:")
    hour, min = helper_class.strip_time_from_input(time_date)
    max_try += 1
    if hour:
    scheduler.add_job(dl_class.shut_down, args=[scheduler], trigger="cron", hour=hour, minute=min)
    logging.info('Script has been loaded all tasks has been scheduled respectively!')
    scheduler.start()
    else:
    dl_class.downloader()
    logging.info("All Tasks has been Terminated / completed.")
    1 change: 1 addition & 0 deletions urls.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    https://www.youtube.com/watch?v=dQw4w9WgXcQ