Skip to content

Instantly share code, notes, and snippets.

@warmbreeze
Forked from abstractart/books.md
Created December 23, 2020 08:33
Show Gist options
  • Select an option

  • Save warmbreeze/4f5cd64f7d1918d87fae1ee697eaca2e to your computer and use it in GitHub Desktop.

Select an option

Save warmbreeze/4f5cd64f7d1918d87fae1ee697eaca2e to your computer and use it in GitHub Desktop.
Free Programming Ebooks - O'Reilly Media. Codeship free ebooks here - https://bit.ly/2oQ0knQ

This script generates links for downloading free books from O'Reilly site (http://www.oreilly.com/programming/free)

Requirements

ruby

httparty, nokogiri (gem install httparty nokogiri --no-ri --no-rdoc)

Execute

ruby script.rb > books.md

require 'httparty' require 'nokogiri' require 'uri'

module OReillySite URL = 'http://www.oreilly.com/' THEMES = ['programming', 'iot', 'data', 'webops-perf', 'web-platform', 'security', 'business'] FORMATS = ['pdf', 'epub', 'mobi'] end

module OReillySite::URLBuilder def self.theme_url(theme) OReillySite::URL + theme + '/' + 'free/' end

def self.download_url(theme, book_filename, format) theme_url(theme) + 'files/' + book_filename + '.' + format end end

module OReillySite::Crawler def self.library books = Hash.new { |hash, key| hash[key] = [] }

OReillySite::THEMES.each do |t|
  books[t] = theme_books(OReillySite::URLBuilder.theme_url(t))
end

books

end

private

def self.theme_books(theme_url) Nokogiri.HTML(HTTParty.get(theme_url).body) .css("section .product-row a") .map { |link| get_book_info(link) } end

def self.get_book_info(link) splitted_url = URI(link.attributes['href'].value).path.split('/')

OpenStruct.new(
  theme:     splitted_url[1],
  title:     link.attributes['title'].value,
  file_name: splitted_url.last.split('.').first
)

end end

def markdown(library) main_header = "# Free Programming Ebooks - O'Reilly Media \n" head_of_contents = ["## Categories"] theme_sections = []

library.each do |theme, books| capitalized_theme = theme.capitalize

head_of_contents << "- [#{capitalized_theme}](##{theme})"

section_header = "## #{capitalized_theme} \n"

section_books = books.map do |book|
  book_title =  "### #{book.title}"
  
  links = OReillySite::FORMATS.map do |fmt|
    "[#{fmt}](#{OReillySite::URLBuilder.download_url(book.theme, book.file_name, fmt)})"
  end.join(" ")

  [book_title, links].join("\n")
end.join("\n")

theme_sections << [section_header, section_books].join("\n")

end

[main_header, head_of_contents.join("\n"), theme_sections].join("\n") end

def main puts markdown(OReillySite::Crawler.library) end

main

# This script generates links for downloading free books from O'Reilly site (http://www.oreilly.com/programming/free)
# Requirements
# ruby
# httparty, nokogiri (gem install httparty nokogiri --no-ri --no-rdoc)
# Execute
# ruby script.rb > books.md
require 'httparty'
require 'nokogiri'
require 'uri'
module OReillySite
URL = 'http://www.oreilly.com/'
THEMES = ['programming', 'iot', 'data', 'webops-perf', 'web-platform', 'security', 'business']
FORMATS = ['pdf', 'epub', 'mobi']
end
module OReillySite::URLBuilder
def self.theme_url(theme)
OReillySite::URL + theme + '/' + 'free/'
end
def self.download_url(theme, book_filename, format)
theme_url(theme) + 'files/' + book_filename + '.' + format
end
end
module OReillySite::Crawler
def self.library
books = Hash.new { |hash, key| hash[key] = [] }
OReillySite::THEMES.each do |t|
books[t] = theme_books(OReillySite::URLBuilder.theme_url(t))
end
books
end
private
def self.theme_books(theme_url)
Nokogiri.HTML(HTTParty.get(theme_url).body)
.css("section .product-row a")
.map { |link| get_book_info(link) }
end
def self.get_book_info(link)
splitted_url = URI(link.attributes['href'].value).path.split('/')
OpenStruct.new(
theme: splitted_url[1],
title: link.attributes['title'].value,
file_name: splitted_url.last.split('.').first
)
end
end
def markdown(library)
main_header = "# Free Programming Ebooks - O'Reilly Media \n"
head_of_contents = ["## Categories"]
theme_sections = []
library.each do |theme, books|
capitalized_theme = theme.capitalize
head_of_contents << "- [#{capitalized_theme}](##{theme})"
section_header = "## #{capitalized_theme} \n"
section_books = books.map do |book|
book_title = "### #{book.title}"
links = OReillySite::FORMATS.map do |fmt|
"[#{fmt}](#{OReillySite::URLBuilder.download_url(book.theme, book.file_name, fmt)})"
end.join(" ")
[book_title, links].join("\n")
end.join("\n")
theme_sections << [section_header, section_books].join("\n")
end
[main_header, head_of_contents.join("\n"), theme_sections].join("\n")
end
def main
puts markdown(OReillySite::Crawler.library)
end
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment