import re, sys
from xml.sax.saxutils import escape, unescape
from urllib import quote, urlopen
sys.path.insert(0,'/home/rubys/svn/feedvalidator/src')
import feedvalidator
from feedvalidator.logging import *
stdout = sys.stdout
def head(name):
print "\n%s..." % name
sys.stdout = open('%s.html' % name.lower().replace(' ',''), 'w')
print ''
print '
'
print 'RSS Profile Conformance: %s' % name
print ''
print ''
print ''
print ''
print ''
print 'RSS Profile Conformance
'
print '%s
' % name
print ''
print ''
print ' | '
print ' Feed | '
print ' Version | '
print ' Valid? | '
print ' Conforms? † | '
print '
'
def row(title, link):
stdout.write(" %s\n" % unescape(title, {"'":"'", """:'"'}))
try:
results = feedvalidator.validateURL(link)
except:
results = {'feedType':'?','loggedEvents':[Error({})]}
profile = 'Yes'
valid = 'Yes'
clean = 'Yes'
for event in results['loggedEvents']:
if isinstance(event, Error):
clean = valid = 'No'
if isinstance(event, Warning):
clean = 'No'
for profileCheck in (ContainsHTML, CharacterData, DuplicateEnclosure,
ContainsRelRef, MissingRealName, MisplacedItem, ImageTitleDoesntMatch,
AvoidTextInput, NeedDescriptionBeforeContent, SlashDate,
MissingAtomSelfLink):
if isinstance(event, profileCheck): profile = 'No'
types = {1:"RSS 1.0", 2:"RSS 2.0", 3:"Atom"}
feedtype = types.get(results['feedType'], 'Unknown')
if not results['feedType'] == 2: profile='N/A'
print
if feedtype != 'RSS 2.0':
if valid == 'No':
print ''
elif clean == 'Yes':
print '
'
else:
print '
'
elif profile == 'No':
if valid == 'No':
print '
'
else:
print '
'
elif valid == 'No':
print '
'
elif clean == 'Yes':
print '
'
else:
print '
'
print (' ' % quote(link) +
' | ')
print ' %s | ' % (link, title)
print ' %s | ' % feedtype
print ' %s | ' % valid
print ' %s | ' % profile
print '
'
def foot():
print '
'
print ''
print ''
print ''
print ''
sys.stdout.close()
sys.stdout = stdout
if __name__ == '__main__':
blogroll = re.sub("\s+", " ", urlopen('http://www.therssweblog.com/').read())
head('RSS Blogroll')
for div in re.findall('', blogroll):
hrefs = re.findall('href="(.*?)">(.*?)',lb):
row(title, link)
foot()