Skip to content

Instantly share code, notes, and snippets.

@planbnet
Created January 12, 2010 20:26
Show Gist options
  • Select an option

  • Save planbnet/275574 to your computer and use it in GitHub Desktop.

Select an option

Save planbnet/275574 to your computer and use it in GitHub Desktop.

Revisions

  1. planbnet created this gist Jan 12, 2010.
    76 changes: 76 additions & 0 deletions gistfile1.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,76 @@
    #!/usr/bin/ruby

    require 'rubygems'
    require 'nokogiri'
    require 'open-uri'
    require 'rss/maker'
    require 'syndication/atom'
    require 'syndication/rss'
    require 'syndication/content'

    url = "http://www.heise.de/newsticker/heise-atom.xml"

    #define filename
    path = File.expand_path(File.dirname(__FILE__))
    filename = "#{path}/heise.rss"

    #load heise feed
    xml = nil
    open(url) do |http|
    xml = http.read
    end
    parser = Syndication::Atom::Parser.new
    feed = parser.parse(xml)

    #open cached feed
    cacheditems = {}
    if File.exists?(filename)
    open(filename) do |s|
    rss = s.read
    cacheparser = Syndication::RSS::Parser.new
    cache = cacheparser.parse(rss)
    cache.items.each { |item| cacheditems[item.link] = item.description }
    end
    end

    content = RSS::Maker.make("2.0") do |m|
    m.channel.title = feed.title.txt
    m.channel.link = feed.links.first.href
    m.channel.about = "http://MYDOMAIN/heise.rss"
    m.channel.language = "de_DE"
    m.channel.description = feed.subtitle.txt
    m.items.do_sort = true

    feed.entries.each do |i|
    item = m.items.new_item

    item.title = i.title.txt
    item.link = i.links.first.href
    item.date = Time.parse i.updated.to_s
    item.guid.content = i.id

    cached_item = cacheditems[item.link]

    if cached_item
    item.description = cached_item
    else
    html = nil
    open(item.link) do |http|
    html = http.read
    end
    doc = Nokogiri::HTML( html )
    content = doc.at("//div[@class='meldung_wrapper']")
    content.search("div").each { |x| x.remove }
    content.search("img").each { |img| img['src'] = img['src'].gsub(/^\//,'http://www.heise.de/') }
    item.description = content.to_s
    end

    end

    end

    File.open(filename, "w") do |f|
    f.write content
    end

    #puts File.read(filename)