import os, time, urllib from xml.dom import minidom, Node from config import channel, directory from post import sanitize, writeComment from rfc822 import parseaddr rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" soapNS = "http://schemas.xmlsoap.org/soap/envelope/" wfwNS = "http://wellformedweb.org/CommentAPI/" xhtmlNS = "http://www.w3.org/1999/xhtml" knownNamespaces = { None: '', "http://purl.org/rss/1.0/": "", "http://purl.org/dc/elements/1.1/": "dc_", "http://purl.org/rss/1.0/modules/content/": "content_", } understoodHeaders={wfwNS: ['preview']} def verifyMustUnderstand(nodes): for node in nodes: if node.nodeType <> Node.ELEMENT_NODE: continue verifyMustUnderstand(node.childNodes) if node.getAttributeNS(soapNS,'mustUnderstand') in ['','0']: continue if node.namespaceURI in understoodHeaders.keys(): if node.localName in understoodHeaders[node.namespaceURI]: continue raise "Required header %s not understood" % str(node.nodeName) def soap(entry, file): try: dom=minidom.parse(file) verifyMustUnderstand(dom.getElementsByTagNameNS(soapNS,'Header')) # find the item item=dom.getElementsByTagNameNS('http://purl.org/rss/1.0/','item') item=item or dom.getElementsByTagName('item') item[0].normalize() # Extended dictionay: defaults for all values, and accessible as properties class DictionaryWithDefaults(dict): def __getattr__(self, attr): return self.get(attr,'') def __setattr__(self, attr, value): self[attr]=value def __getitem__(self, item): return self.get(item,'') # Grab the text elements comment = DictionaryWithDefaults() for node in item[0].childNodes: if node.nodeType <> Node.ELEMENT_NODE: continue if node.namespaceURI == xhtmlNS: value=node.toxml() value=value[value.find('>')+1:value.rfind('<')] comment['xhtml_' + node.localName]=value else: if len(node.childNodes) <> 1: continue if node.firstChild.nodeType <> Node.TEXT_NODE: continue if not node.namespaceURI in knownNamespaces: continue prefix=knownNamespaces[node.namespaceURI] comment[prefix + node.localName]=node.firstChild.nodeValue # determine name name = comment.dc_creator or comment.author if name.find('@')>0: (name, comment.email)=parseaddr(name) if not name and comment.link: try: from pingback import parser file=urllib.urlopen(comment.link.split('#')[0]) page = parser() page.feed(file.read()) file.close() name = page.title except: pass if not name: name="anonymous" # determine link link =comment.link if not link: link= item[0].getAttributeNS(rdfNS,"about") if not link and comment.email: link = "mailto:" + comment.email if link: name = '%s' % (link, name) # determine description body = sanitize(comment.xhtml_body or comment.content_encoded or comment.description).strip() if not body: from pingback import excerpt (comment.title,body)=excerpt(comment.link) if body and not body.endswith('
'): body += '\n