#!/usr/bin/env python # -*- coding: utf-8 -*- """ This script is just a wrapper of the "aws s3api get-object" command to download the spacenet dataset. It requires a configured AWS cli see pages: https://aws.amazon.com/public-datasets/spacenet/ http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html """ from __future__ import print_function import argparse import os import subprocess from urlparse import urljoin def download_file(s3_path, rooturl): """ Downlad a single file from the s3://spacenet-dataset bucket. s3_path must be valid, a similar directory tree will be created locally. The manifest.txt file provides the list of paths. You can download it with the option: -p manifest.txt """ # left and right strip some character of s3_path s3_path = s3_path.strip().lstrip('./') # s3_path is a file only if it contains a '.' if '.' not in s3_path: return # if a file already exists do nothing (no download) if os.path.isfile(s3_path): print('skip file: "{}", because it already exists'.format(s3_path)) return # create the directory (and parents) for the new file try: dirname = os.path.dirname(s3_path) if dirname and not os.path.isdir(dirname): print('creating directory: {}'.format(dirname)) os.makedirs(dirname) except Exception as e: print('Exception: {}'.format(e)) # download the file with an 'aws s3api get-object' command try: print('downloading file: {} ...'.format(s3_path)) # command = ['aws', 's3api', 'get-object', # '--bucket', 'spacenet-dataset', '--request-payer', 'requester', # '--key', s3_path, s3_path] command = ["axel", "-n", "10", urljoin(rooturl, s3_path), "--output", s3_path] subprocess.check_call(command) except Exception as e: print('Exception: {}'.format(e)) def download_selection(selection_file_path, rooturl): """ Download files contained in a the selection_file, which must contain an s3 path on each line (as in the manifest file), for example: ./manifest.txt ./AOI_1_Rio/srcData/mosaic_8band/013022223121.tif """ with open(selection_file_path, 'r') as selection_file: for s3_path in selection_file: download_file(s3_path, rooturl) def main(args): """ Parse the arguments, and call a download function usage: spacenet_download.py [-h] [-p PATH | -s SELECTION] optional arguments: -h, --help show this help message and exit -p PATH, --path PATH a single s3 path to be downloaded -s SELECTION, --selection SELECTION path to a selection file. All s3 paths written in this file will be downloaded """ parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group() group.add_argument("-p", "--path", help="a single s3 path to be downloaded") group.add_argument("-s", "--selection", help="path to a selection file. All s3 paths written in this " + "file will be downloaded") group.add_argument("-r", "--rooturl", help="url of the file to be download" + "file will be downloaded") args = parser.parse_args() if not args.rooturl: parser.error("-r is mandatory") if args.path: download_file(args.path, args.rooturl) elif args.selection: download_selection(args.selection, args.rooturl) else: parser.error('Use the -p or -s option, for example: -p manifest.txt') return 0 if __name__ == '__main__': import sys sys.exit(main(sys.argv))