import re, sys from xml.sax.saxutils import escape, unescape from urllib import quote, urlopen sys.path.insert(0,'/home/rubys/svn/feedvalidator/src') import feedvalidator from feedvalidator.logging import * stdout = sys.stdout def head(name): print "\n%s..." % name sys.stdout = open('%s.html' % name.lower().replace(' ',''), 'w') print '' print '' print 'RSS Profile Conformance: %s' % name print '' print '' print '' print '' print '' print '

RSS Profile Conformance

' print '

%s

' % name print '' print '' print ' ' print ' ' print ' ' print ' ' print ' ' print '' def row(title, link): stdout.write(" %s\n" % unescape(title, {"'":"'", """:'"'})) try: results = feedvalidator.validateURL(link) except: results = {'feedType':'?','loggedEvents':[Error({})]} profile = 'Yes' valid = 'Yes' clean = 'Yes' for event in results['loggedEvents']: if isinstance(event, Error): clean = valid = 'No' if isinstance(event, Warning): clean = 'No' for profileCheck in (ContainsHTML, CharacterData, DuplicateEnclosure, ContainsRelRef, MissingRealName, MisplacedItem, ImageTitleDoesntMatch, AvoidTextInput, NeedDescriptionBeforeContent, SlashDate, MissingAtomSelfLink): if isinstance(event, profileCheck): profile = 'No' types = {1:"RSS 1.0", 2:"RSS 2.0", 3:"Atom"} feedtype = types.get(results['feedType'], 'Unknown') if not results['feedType'] == 2: profile='N/A' print if feedtype != 'RSS 2.0': if valid == 'No': print '' elif clean == 'Yes': print '' else: print '' elif profile == 'No': if valid == 'No': print '' else: print '' elif valid == 'No': print '' elif clean == 'Yes': print '' else: print '' print (' ') print ' ' % (link, title) print ' ' % feedtype print ' ' % valid print ' ' % profile print '' def foot(): print '
FeedVersionValid?Conforms?
' % quote(link) + '%s%s%s%s
' print '

' print '† Indicates that one of more of the following conditions are present in the feed:' print '

' print '' print '' print '' sys.stdout.close() sys.stdout = stdout if __name__ == '__main__': blogroll = re.sub("\s+", " ", urlopen('http://www.therssweblog.com/').read()) head('RSS Blogroll') for div in re.findall('
(.*?)
', blogroll): hrefs = re.findall('href="(.*?)">(.*?)',lb): row(title, link) foot()