Skip to content

Instantly share code, notes, and snippets.

@jpadilla
Forked from jokull/s3_sync.py
Created April 1, 2013 12:47
Show Gist options
  • Select an option

  • Save jpadilla/5284765 to your computer and use it in GitHub Desktop.

Select an option

Save jpadilla/5284765 to your computer and use it in GitHub Desktop.

Revisions

  1. @jokull jokull created this gist Nov 22, 2012.
    115 changes: 115 additions & 0 deletions s3_sync.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,115 @@
    # encoding=utf-8

    import sys
    import datetime
    import email
    import mimetypes
    import os
    import time
    import gzip
    import subprocess

    from cStringIO import StringIO

    from boto.s3.connection import S3Connection

    s3 = S3Connection()

    GZIP_CONTENT_TYPES = (
    'text/css',
    'application/javascript',
    )

    GZIP_SIZE_MIN = 1024 # Per recommendation

    EXCLUDE_FILENAMES = ('.DS_Store', '.git')

    HEADERS = {
    # HTTP/1.0
    'Expires': '%s GMT' % (email.Utils.formatdate(
    time.mktime((datetime.datetime.now() +
    datetime.timedelta(days=365 * 2)).timetuple()))),
    # HTTP/1.1
    'Cache-Control': 'max-age %d' % (3600 * 24 * 365 * 2),
    }


    def main():
    try:
    media_root, bucket_root = sys.argv[1:]
    except ValueError:
    sys.exit(u"Error. (Hint: python sync_s3.py public/ s3.bucket.com/static )")

    if '/' in bucket_root:
    bucket_name, prefix = bucket_root.split("/", 1)
    else:
    bucket_name, prefix = bucket_root, ''

    bucket = s3.get_bucket(bucket_name)

    # tar c dir | md5sum
    media_root_md5, stderr = subprocess.Popen('tar c %s | md5' % media_root,
    stdout=subprocess.PIPE, shell=True).communicate()
    if stderr:
    raise Exception(u'Could not get unique folder checksum')

    s3_root = os.path.join(
    prefix,
    media_root_md5[:6],
    )

    if not media_root.endswith("/"):
    # We want to copy folder as a whole, not just contents - like rsync
    s3_root = os.path.join(s3_root, media_root)

    s3_root = s3_root.rstrip("/") # Normalize

    print "Uploading to //s3.amazonaws.com/%s/%s/" % (bucket_name, s3_root)

    for root, dirs, files in os.walk(media_root):
    for filename in files:
    if [s for s in EXCLUDE_FILENAMES if root.endswith(s)]:
    continue # example .git
    if filename in EXCLUDE_FILENAMES:
    continue # example .DS_Store

    path = os.path.join(root, filename)
    s3_path = os.path.join(os.path.relpath(root, media_root), filename)
    s3_path = os.path.normpath(os.path.join(s3_root, s3_path))

    content_type, _ = mimetypes.guess_type(s3_path)
    byte_length = os.stat(path).st_size
    headers = HEADERS.copy()
    key = bucket.new_key(s3_path)

    with file(path) as fp:

    if content_type in GZIP_CONTENT_TYPES and byte_length > GZIP_SIZE_MIN:
    headers['Content-Encoding'] = 'gzip'
    compressed = StringIO()
    with gzip.GzipFile(fileobj=compressed, mode='wr', compresslevel=9) as gzip_fp:
    gzip_fp.write(fp.read())
    contents = compressed.getvalue()

    else:
    contents = fp.read()

    if content_type:
    headers['Content-Type'] = content_type

    if os.environ.get('DRYRUN') == "true":
    for key, value in headers.items():
    print "%s: %s" % (key, value)
    print s3_path
    print

    else:
    key.set_contents_from_string(
    contents, headers, replace=True, policy='public-read')


    if __name__ == '__main__':
    try:
    main()
    except KeyboardInterrupt:
    sys.exit(u"Early exit")