import re, sgmllib, sys, time, urllib
from glob import glob
from xml.sax.saxutils import escape
import extractor
PINGABLE = re.compile('http://www.intertwingly.net/blog/(\d+).html')
class parser(sgmllib.SGMLParser):
""" extract title and hrefs from a web page"""
intitle=0
title = ""
hrefs = []
def do_a(self, attrs):
attrs=dict(attrs)
if attrs.has_key('href'): self.hrefs.append(attrs['href'])
def do_title(self, attrs):
if self.title=="": self.intitle=1
def unknown_starttag(self, tag, attrs):
self.intitle=0
def unknown_endtag(self,tag):
self.intitle=0
def handle_charref(self, ref):
if self.intitle: self.title = self.title + ("%s;" % ref)
def handle_data(self,text):
if self.intitle: self.title = self.title + text
def excerpt(source):
baseurl=source.split("#")[0]
feedurl = extractor.html(baseurl).feed
feed = minidom.parseString(feedurl)
entries = feed.getElementsByTagName('entry')
entries = entries or feed.getElementsByTagName('item')
for entry in entries:
(title, alternate, summary, content) = extractor.extract(entry, feedurl)
if alternate==source:
summary = summary or content
summary=re.compile('<.*?>',re.S).sub('',summary)
summary=re.sub('\s+',' ',summary)
summary=summary[:summary.rfind(' ',0,250)][:250]
return (title, summary)
else:
return ('', '')
def ping(source, target):
from post import sanitize, writeComment, existingBacklink
file = urllib.urlopen(source.split('#')[0])
page = parser()
page.feed(file.read())
file.close()
if not target in page.hrefs: return "href not found"
for entry in PINGABLE.findall(target):
if existingBacklink(entry,source):
return "target page has already been pinged"
if page.title == "": page.title = source
(title,body)=excerpt(source)
title = escape(page.title.strip().replace('&','&'))
if body: body = sanitize(body) + " ...
"
source = escape(source.replace('&','&'))
body += 'Pingback from %s' % (source, title)
writeComment(entry, title, body)
return "ping successful"
return "target page not eligable for pings"
def pingback():
import os, xmlrpclib
try:
data = sys.stdin.read(int(os.environ["CONTENT_LENGTH"]))
if not data: return
params, method = xmlrpclib.loads(data)
if method=='pingback.ping':
message=apply(ping, params)
response = xmlrpclib.dumps(tuple([message]), methodresponse=1)
else:
raise NameError, method
except:
import traceback
tb = ''.join(apply(traceback.format_exception, sys.exc_info()))
import nonce,time
nonce.generate(time.time()+86400, tb)
fault = xmlrpclib.Fault(1, tb)
response = xmlrpclib.dumps(fault)
print 'Content-type: text/xml'
print
print response