Skip to content

Instantly share code, notes, and snippets.

@auyer
Created August 20, 2020 16:35
Show Gist options
  • Select an option

  • Save auyer/f7defc2420111c34fe81dda74efaf176 to your computer and use it in GitHub Desktop.

Select an option

Save auyer/f7defc2420111c34fe81dda74efaf176 to your computer and use it in GitHub Desktop.

Revisions

  1. auyer created this gist Aug 20, 2020.
    89 changes: 89 additions & 0 deletions cuncurrenlty_delete_S3files.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,89 @@
    package main

    import (
    "fmt"
    "sync"

    "github.com/aws/aws-sdk-go/aws"
    "github.com/aws/aws-sdk-go/aws/awserr"
    "github.com/aws/aws-sdk-go/aws/session"
    "github.com/aws/aws-sdk-go/service/s3"
    )

    // This script will read all files in a folder or with a prefix with pagination,and run a batch delete for each page.
    // To avoid rate limit errors, this is done with 15 workers (that request the group deletion)

    // Set these Variables to point to what you want to delete
    var bucketName = "your-S3-bucket-name"
    var s3Region = "us-east-1"

    // this variable can be a fodler name, or a prefix that every file you want to delete has.
    var prefixOrFolder = "folder-name"

    var wg sync.WaitGroup

    // Worker that will delete the files in batch
    func worker(id int, svc *s3.S3, jobs <-chan []*s3.Object) {
    sid := fmt.Sprintf("%d", id)
    for list := range jobs {
    fmt.Println("worker " + string(id) + "got job")
    objList := []*s3.ObjectIdentifier{}
    // creates list of objects for input
    for _, obj := range list {
    objList = append(objList, &s3.ObjectIdentifier{Key: obj.Key})
    }

    input := &s3.DeleteObjectsInput{
    Bucket: aws.String(bucketName),
    Delete: &s3.Delete{
    Objects: objList,
    Quiet: aws.Bool(false),
    },
    }

    _, err := svc.DeleteObjects(input)
    if err != nil {
    if aerr, ok := err.(awserr.Error); ok {
    switch aerr.Code() {
    default:
    fmt.Println("worker ERROR " + sid + aerr.Error())
    }
    } else {
    fmt.Println("worker ERROR " + sid + err.Error())
    }
    }
    fmt.Println("worker " + sid + "got result")
    wg.Done()
    }
    }

    func main() {
    sess := session.Must(session.NewSession())

    svc := s3.New(sess, aws.NewConfig().WithRegion(s3Region))
    wg = sync.WaitGroup{}

    jobs := make(chan []*s3.Object, 16)

    for w := 1; w <= 15; w++ {
    go worker(w, svc, jobs)
    }

    i := 0
    // ListObjectsPages will list all matching objects in pages
    err := svc.ListObjectsPages(&s3.ListObjectsInput{
    Bucket: aws.String(bucketName),
    Prefix: aws.String(prefixOrFolder),
    }, func(p *s3.ListObjectsOutput, last bool) (shouldContinue bool) {
    wg.Add(1)
    fmt.Println("Page,", i)
    i++
    jobs <- p.Contents
    return true
    })
    wg.Wait()
    if err != nil {
    fmt.Println("failed to list objects", err)
    return
    }
    }