Created
November 2, 2010 01:54
-
-
Save valda/659164 to your computer and use it in GitHub Desktop.
Revisions
-
valda created this gist
Nov 2, 2010 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,70 @@ #!/usr/bin/ruby require 'rubygems' require 'active_support' require 'mechanize' require 'optparse' require 'fileutils' Version = "1.0.0" options = { :tag => nil, :dir => 'downloads' } parser = OptionParser.new do |ps| ps.on("-t TAG", "--tag TAG", "fetch tag") { |v| options[:tag] = v } ps.on("-d DIR", "--dir DIR", "save dir") { |v| options[:dir] = v } ps.on("-p NUM", "--pid NUM", "start page id (gelbooru style: 25,50,75...)") { |v| options[:page] = v } ps.parse!(ARGV) end if options[:tag].blank? puts parser.help exit end class Gelbooru attr_reader :download_dir def initialize(opts = {}) @agent = Mechanize.new do |a| a.user_agent_alias = 'Windows IE 7' a.max_history = 1 end @base_url = 'http://gelbooru.com/index.php' @download_dir = (opts[:download_dir] or '.') end def fetch_by_tags(tag, pid = 0) search_url = URI.join(@base_url, "?page=post&s=list&tags=#{CGI.escape(tag)}&pid=#{pid}") while true puts "Open: #{search_url}" page = @agent.get(search_url) page.root.search('//span[@class="thumb"]/a').each do |elem| image_url = URI.join(@base_url, elem['href']) puts "Open: #{image_url}" image_page = @agent.get(image_url) if link = image_page.links.find {|l| l.text.match(/^original image$/i) } save_path = File.join(@download_dir, File.basename(link.href)) if File.exist? save_path puts "File already exist: #{save_path}" else puts "Download: #{link.href}" orig_image = @agent.get_file(link.href) puts "Save to: #{save_path}" File.open(save_path, 'w') do |fh| fh << orig_image end end end end if next_link = page.links.find {|l| 'next' == l.attributes[:alt] } search_url = URI.join(@base_url, next_link.href) else break end end end end if not File.exist? options[:dir] FileUtils.mkpath options[:dir] end g = Gelbooru.new(:download_dir => options[:dir]) g.fetch_by_tags(options[:tag], options[:page])