require 'rexml/document' require 'html5/liberalxmlparser' module Planet module XmlParser begin require 'xml/parser' # http://www.yoshidam.net/xmlparser_en.txt @@parser = :expat rescue LoadError begin require 'xml/libxml' # http://libxml.rubyforge.org/ @@parser = :libxml2 rescue LoadError @@parser = :rexml end end def XmlParser.parse source source = source.read if source.respond_to? :read begin case @@parser when :expat # fast, compliant, but not always installed doc = XmlParser.expat source when :libxml2 # also fast, compliant, but not always installed doc = XmlParser.libxml2 source else # fairly fast, fairly compliant, always available doc = REXML::Document.new source end bozo = false rescue # If everything is being bozo'd, enable this to see why. # print "PARSE ERROR: #{$!}\n #{$!.backtrace.join("\n ")}\n" # last ditch attempt: use a liberal XML parser parser = HTML5::XMLParser.new doc = REXML::Document.new parser.parse_fragment(source).each {|node| doc << node rescue nil} bozo = true end # augment the document with feed parser attributes source = nil class << doc attr_accessor :bozo end doc.bozo = bozo doc end def XmlParser.expat source parser = XML::Parser.new class <