Skip to content

Instantly share code, notes, and snippets.

@valda
Created November 2, 2010 01:54
Show Gist options
  • Select an option

  • Save valda/659164 to your computer and use it in GitHub Desktop.

Select an option

Save valda/659164 to your computer and use it in GitHub Desktop.

Revisions

  1. valda created this gist Nov 2, 2010.
    70 changes: 70 additions & 0 deletions gelbooru_downloader.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,70 @@
    #!/usr/bin/ruby

    require 'rubygems'
    require 'active_support'
    require 'mechanize'
    require 'optparse'
    require 'fileutils'

    Version = "1.0.0"
    options = { :tag => nil, :dir => 'downloads' }
    parser = OptionParser.new do |ps|
    ps.on("-t TAG", "--tag TAG", "fetch tag") { |v| options[:tag] = v }
    ps.on("-d DIR", "--dir DIR", "save dir") { |v| options[:dir] = v }
    ps.on("-p NUM", "--pid NUM", "start page id (gelbooru style: 25,50,75...)") { |v| options[:page] = v }
    ps.parse!(ARGV)
    end

    if options[:tag].blank?
    puts parser.help
    exit
    end

    class Gelbooru
    attr_reader :download_dir
    def initialize(opts = {})
    @agent = Mechanize.new do |a|
    a.user_agent_alias = 'Windows IE 7'
    a.max_history = 1
    end
    @base_url = 'http://gelbooru.com/index.php'
    @download_dir = (opts[:download_dir] or '.')
    end
    def fetch_by_tags(tag, pid = 0)
    search_url = URI.join(@base_url, "?page=post&s=list&tags=#{CGI.escape(tag)}&pid=#{pid}")
    while true
    puts "Open: #{search_url}"
    page = @agent.get(search_url)
    page.root.search('//span[@class="thumb"]/a').each do |elem|
    image_url = URI.join(@base_url, elem['href'])
    puts "Open: #{image_url}"
    image_page = @agent.get(image_url)
    if link = image_page.links.find {|l| l.text.match(/^original image$/i) }
    save_path = File.join(@download_dir, File.basename(link.href))
    if File.exist? save_path
    puts "File already exist: #{save_path}"
    else
    puts "Download: #{link.href}"
    orig_image = @agent.get_file(link.href)
    puts "Save to: #{save_path}"
    File.open(save_path, 'w') do |fh|
    fh << orig_image
    end
    end
    end
    end
    if next_link = page.links.find {|l| 'next' == l.attributes[:alt] }
    search_url = URI.join(@base_url, next_link.href)
    else
    break
    end
    end
    end
    end

    if not File.exist? options[:dir]
    FileUtils.mkpath options[:dir]
    end

    g = Gelbooru.new(:download_dir => options[:dir])
    g.fetch_by_tags(options[:tag], options[:page])