Created
April 2, 2013 20:53
-
-
Save twslankard/5296081 to your computer and use it in GitHub Desktop.
Revisions
-
Tom Slankard created this gist
Apr 2, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,95 @@ #!/usr/bin/env ruby require 'rubygems' require 'aws-sdk' def readPart(file_name, file_size, part_size, part) file_offset = part_size * ( part - 1 ) bytes_to_read = [ part_size, file_size - file_offset ].min part_contents = File.read( file_name, bytes_to_read, file_offset ) part_md5 = Digest::MD5.hexdigest( part_contents ) return part_contents, part_md5 end def uploadPart(part_contents, part_md5, part, upload) result_obj = upload.add_part( part_contents, :part_number => part ) etag_md5sum = result_obj.etag[ 1..-2 ] # strip off the quotes ... not sure why they decided to include those if etag_md5sum != part_md5 raise "part_md5 mismatch!" end end def shouldUploadPart(part, part_md5, upload) begin if upload.parts[part].etag[1..-2] == part_md5 return false end rescue return true end return true end def uploadPartWithRetry(part_contents, part_md5, part, upload, tries) the_exception = nil for try in 1..tries begin uploadPart(part_contents, part_md5, part, upload) return rescue => e the_exception = e end end raise "too many retries, error: " + the_exception.message end def uploadParts(file_name, file_size, part_size, number_of_parts, upload) skipped_parts = 0 start_time = Time.now for part in 1..number_of_parts part_contents, part_md5 = readPart( file_name, file_size, part_size, part ) if( shouldUploadPart( part, part_md5, upload ) ) uploadPartWithRetry(part_contents, part_md5, part, upload, 3) elapsed_time = Time.now - start_time average_part_time = elapsed_time / (part - skipped_parts) estimated_time_remaining = (average_part_time * (number_of_parts - part) / 60.0).ceil puts "Uploaded #{part}/#{number_of_parts} parts. Estimated time remaining: #{estimated_time_remaining} minutes" else skipped_parts += 1 puts "Skipping already uploaded part #{part}/#{number_of_parts}." end end upload.complete(:remote_parts) end file_name = ARGV[0] bucket_name = ARGV[1] key_name = ARGV[2] s3 = AWS::S3.new bucket = s3.buckets[bucket_name] object = bucket.objects[key_name] number_of_multipart_uploads = 0 object.multipart_uploads.each do |upload| number_of_multipart_uploads += 1 end upload = nil if number_of_multipart_uploads > 1 raise "multiple uploads in progress" # haven't decided what to do here yet elsif number_of_multipart_uploads == 1 upload = object.multipart_uploads.first else upload = object.multipart_upload end file_size = File.size(file_name) part_size = [ (file_size / 10000.0).ceil, 5*1024*1024 ].max number_of_parts = (file_size / part_size.to_f).ceil puts "Uploading #{file_size} byte file as #{number_of_parts} chunks each up to #{part_size} bytes." uploadParts(file_name, file_size, part_size, number_of_parts, upload) puts "Done!"