require 'ferrum' require 'open-uri' require 'pry' require 'csv' puts "What recipes are we looking for?" query = gets.chomp puts "How many pages of results do we need? (sorted by relevance)" max_pages = gets.chomp.to_i puts 'loading the magic' 3.times{ puts '' } def click_away_gdpr_popup(browser) expr = <<~JS document.querySelector('button[mode="primary"]')?.click() JS browser.execute expr end def move_like_a_human(browser) sleep rand(0.5..1.5) expr = <<~JS window.scrollTo(0, #{rand(400..1200)}) JS browser.execute expr end browser_options = { timeout: 10, process_timeout: 10, headless: false } scraped_cards = [] page = 1 while page <= max_pages puts "handling page #{page}" browser = Ferrum::Browser.new browser_options browser.goto "https://www.bbcgoodfood.com/search/recipes/page/#{page}/?q=#{query}&sort=-relevance" click_away_gdpr_popup(browser) move_like_a_human(browser) begin retries ||= 0 puts "trying to find recipes => retries #{retries}" browser.at_css('.standard-card-new--skinny') rescue browser.refresh move_like_a_human(browser) sleep 5 retry if (retries += 1) < 3 end if browser.css('.standard-card-new--skinny').count.zero? puts 'no more recipes! Bye now' break end browser.css('.standard-card-new--skinny').each do |card| title = card.at_css('.standard-card-new__display-title').inner_text descr = card.at_css('.standard-card-new__description').inner_text img_url = card.at_css('.img-container__image').attribute 'src' scraped_cards << [title, descr, img_url] end 2.times{ puts '' } puts 'scraped_cards.count' puts scraped_cards.count page += 1 browser.quit end puts 'making that csv now' csv_title = "recipes for #{query} #{Time.now.strftime('%F %H_%M')}.csv" CSV.open(csv_title, "w") do |csv| csv << ["Title", "Description", "Image Url"] scraped_cards.each do |scraped_card| csv << scraped_card end end puts "#{csv_title} created! Look for it in the same folder!" 3.times{ puts '' } puts 'magic happened'