require "rexml/document" require 'test/unit' # Parse an atom:title, atom:summary, atom:content, or atom:rights element and # return its contents as text; i.e., unencode that which is encoded, and # return the rest as is. # # This processing model is described in sections 3.1.1 and 4.1.3 of # the Atom Syndication Format. Most of the bits relevant to the subject # at hand can be found at: . def text element return nil if element.attribute("src") type = element.attribute("type") case type && type.value when "xhtml" throw "missing xhtml:div" if element.elements[1].name != "div" return element.elements[1].to_a.to_s.strip when "html" return element.text.strip when "text", nil return element.to_a.to_s.strip when /^text\//i, /\+xml$/i, /\/xml$/i return element.to_a.to_s.strip else require 'base64' return Base64.decode64(element.text.gsub(/\s/,'')) end end # TestText provides unit test cases for the text function described above. # # One parse method is defined to take care of the small bit of administrivia # necessary. The remainder are the actual test cases, and all begin with the # string "test_". Each invokes the parse method with a given feed, and makes # an assertion on what the expected results are. class TestText < Test::Unit::TestCase # The parse method takes care of the following: # * parsing the feed as XML # * finding the first content element # * extracting the text from it def parse feed doc = REXML::Document.new feed ns = {"atom" => "http://www.w3.org/2005/Atom"} content = REXML::XPath.first doc, "//atom:content", ns return text(content) end # test content with no type attribute specified (default="text") def test_default assert_equal("AT&T bought by SBC!",parse(<<-END)) AT&T bought by SBC! END end # test content with type="text" def test_text assert_equal("AT&T bought by SBC!",parse(<<-END)) AT&T bought by SBC! END end # test content with type="html" def test_html assert_equal("AT&T bought by SBC!",parse(<<-END)) AT&T bought <b>by SBC</b>! END end # test content with type="xhtml" def test_xhtml assert_equal("AT&T bought by SBC!",parse(<<-END))

AT&T bought by SBC!

END end # test content with an xml mime type def test_xml assert_match(/Codezoo Crawler<\/name>/,parse(<<-END)) Codezoo Crawler END end # test content with an non-xml and non-text mime type def test_base64 assert_equal("Send reinforcements",parse(<<-END)) U2VuZCByZWluZm9yY2VtZW50cw== END end # test out-of-line content def test_src assert_equal(nil,parse(<<-END)) END end end