Last active
January 21, 2019 10:39
-
-
Save joaoescribano/b9c34e7a07fb450ae7747e582dd4e6ba to your computer and use it in GitHub Desktop.
Revisions
-
joaoescribano created this gist
Jan 21, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,141 @@ #!/usr/bin/env python3 import datetime import math import os import sys import tempfile # third-party imports from PIL import Image from selenium import webdriver from time import sleep def get_chrome_drive(driver_path=None): base_dir = os.path.dirname( os.path.abspath(__file__) ) log_path = os.path.join( base_dir, 'chromedriver.log' ) if driver_path is None: driver_path = '/usr/bin/chromedriver' pass options = webdriver.ChromeOptions() options.headless = True options.add_argument('--hide-scrollbars') options.add_argument('--no-sandbox') driver = webdriver.Chrome( executable_path=driver_path, chrome_options=options, service_args=[ # '--log-path={}'.format(log_path), # '--verbose', ] ) return driver def get_firefox_drive(driver_path=None): base_dir = os.path.dirname( os.path.abspath(__file__) ) log_path = os.path.join( base_dir, 'geckodriver.log' ) if driver_path is None: driver_path = '/usr/bin/geckodriver' pass options = webdriver.FirefoxOptions() options.add_argument('-headless') driver = webdriver.Firefox( executable_path=driver_path, firefox_options=options ) return driver def save_fullpage_screenshot(driver, url, output_path, tmp_prefix='selenium_screenshot', tmp_suffix='.png'): """ Creates a full page screenshot using a selenium driver by scrolling and taking multiple screenshots, and stitching them into a single image. """ # get the page driver.get(url) # get dimensions window_height = driver.execute_script('return window.innerHeight') scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight') num = int( math.ceil( float(scroll_height) / float(window_height) ) ) # get temp files tempfiles = [] for i in range( num ): fd,path = tempfile.mkstemp(prefix='{0}-{1:02}-'.format(tmp_prefix, i+1), suffix=tmp_suffix) os.close(fd) tempfiles.append(path) pass tempfiles_len = len(tempfiles) try: # take screenshots for i,path in enumerate(tempfiles): if i > 0: driver.execute_script( 'window.scrollBy(%d,%d)' % (0, window_height) ) driver.save_screenshot(path) pass # stitch images together stiched = None for i,path in enumerate(tempfiles): img = Image.open(path) w, h = img.size y = i * window_height if i == ( tempfiles_len - 1 ) and num > 1: img = img.crop(( 0, h-(scroll_height % h), w, h )) w, h = img.size pass if stiched is None: stiched = Image.new('RGB', (w, scroll_height)) stiched.paste(img, ( 0, # x0 y, # y0 w, # x1 y + h # y1 )) pass stiched.save(output_path) finally: # cleanup for path in tempfiles: if os.path.isfile(path): os.remove(path) pass return output_path def main(): url = sys.argv[1] filename = sys.argv[2] driver = get_chrome_drive() driver.set_window_size(1280,768) save_fullpage_screenshot(driver, url, filename) driver.quit() return if __name__ == '__main__': main()