ó
ª	êKc           @   se  d  Z  d d l Z d d l Z d d l Z d d l m Z d d l m Z m Z d d l	 m
 Z
 d d l m Z d d l Z d d l Z y d d l m Z Wn d d l m Z n Xe j d	 e j ƒ Z d
 „  Z d „  Z d „  Z e j d e j ƒ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z  d „  Z! d „  Z" d d „ Z$ d S(   sÌ  
Reconstitute an entry document from the output of the Universal Feed Parser.

The main entry point is called 'reconstitute'.  Input parameters are:

  results: this is the entire hash table return by the UFP
  entry:   this is the entry in the hash that you want reconstituted

The value returned is an XML DOM.  Every effort is made to convert
everything to unicode, and text fields into either plain text or
well formed XHTML.

Todo:
  * extension elements
iÿÿÿÿN(   t   escape(   t   minidomt   Node(   t   html5parser(   t   dom(   t   md5(   t   news
   [--]c         C   s™   | s
 d St  | t ƒ rH y | j d ƒ } WqH | j d ƒ } qH Xn  t j t | ƒ } |  j } | j | ƒ } | j | j	 | ƒ ƒ |  j | ƒ | S(   sC    utility function to create a child element with the specified textNs   utf-8s
   iso-8859-1(
   t
   isinstancet   strt   decodet   illegal_xml_charst   subt
   invalidatet   ownerDocumentt   createElementt   appendChildt   createTextNode(   t   parentt   namet   valuet   xdoct   xelement(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   createTextElement   s     	c         C   s)   d d t  t |  j d ƒ ƒ ƒ d d S(   s    replace invalid characters u   <abbr title="U+%s">ï¿½</abbr>t   000i    i   iüÿÿÿ(   t   hext   ordt   group(   t   c(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyR   -   s    c         C   sP   |  j  d ƒ }  |  j d ƒ r: t t |  d d ƒ ƒ }  n t t |  ƒ ƒ }  |  S(   s4    convert numeric character references to characters i   t   xi   (   R   t
   startswitht   unichrt   int(   R   (    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   ncr2c2   s
    s   \W+c         C   s]   y. t  j d |  j d ƒ ƒ j ƒ  j d ƒ }  Wn t  j d |  ƒ j ƒ  }  n X|  j d ƒ S(   s    generate a css id from a name t   -s   utf-8(   t   nonalphaR   R	   t   lowert   encodet   strip(   R   (    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   cssid<   s
    .c      	   C   s.  | j  d ƒ r$ | j r$ | j } ní | j  d ƒ rH | j rH | j } nÉ | j  d ƒ r† | j r† | j j d t | j ƒ j ƒ  } n‹ | j  d ƒ rÄ | j rÄ | j	 j d t | j ƒ j ƒ  } nM | j  d ƒ r| j
 r| j
 d j d t | j
 d j ƒ j ƒ  } n d S|  r*t |  d | ƒ n  | S(	   s%    copy or compute an id for the entry t   idt   linkt   titlet   /t   summaryt   contenti    N(   t   has_keyR'   R(   R)   t   title_detailt   baseR   t	   hexdigestR+   t   summary_detailR,   R   R   (   t   xentryt   entryt   entry_id(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyR'   D   s"      c         C   sC  | j  d ƒ sP g  | d <| j  d ƒ rP | d j i d d 6| j d 6ƒ qP n  |  j } xã | d D]× } d | j ƒ  k r‚ qd n  | j d ƒ } | j d | j d ƒ ƒ | j  d ƒ rÕ | j d | j d ƒ ƒ n  | j  d ƒ r| j d | j d d ƒ ƒ n  | j  d ƒ r.| j d | j d ƒ ƒ n  |  j	 | ƒ qd Wd S(	   s    copy links to the entry t   linksR(   t	   alternatet   relt   hreft   typet   lengthN(
   R-   t   appendR(   R   t   keysR   t   setAttributet   gett   NoneR   (   R2   R3   R   R(   t   xlink(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyR5   [   s"    
(	 c         C   sc   | s
 d St  j d | ƒ } t |  | | ƒ } t  j t j ƒ  | ƒ } | j d | j d ƒ ƒ d S(   s/    insert a date-formated element into the entry Ns   %Y-%m-%dT%H:%M:%SZs   planet:formats   utf-8(   t   timet   strftimeR   t   configt   date_formatR=   R	   (   R2   R   t   parsedt	   formattedt   xdate(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   daten   s     c         C   sÂ   |  j  j d ƒ } | j d ƒ s, | j r0 d  S| j d | j d ƒ ƒ | j d ƒ r} | j r} | j d | j d ƒ ƒ n  | j d ƒ r± | j r± | j d | j d ƒ ƒ n  |  j | ƒ d  S(   Nt   categoryt   termt   schemet   label(	   R   R   R-   RJ   R=   R>   RK   RL   R   (   R2   t   tagt   xtag(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyRI   v   s     c         C   s¯   | s
 d S|  j  } | j | ƒ } | j d d ƒ rP t | d | j d ƒ ƒ n | j | j d ƒ ƒ t | d | j d d ƒ ƒ t | d | j d d ƒ ƒ |  j | ƒ d S(   s.    insert an author-like element into the entry NR   t   emailt   uriR8   (   R   R   R>   R?   R   R   (   R2   R   t   detailR   t   xauthor(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   author€   s     	c         C   sö  | s | j  r d Sd } d } |  j } | j | ƒ } t | j  t ƒ rc | j  j d ƒ | _  n  | j d ƒ s‘ | j j	 ƒ  j
 d ƒ d k  r± t | j  ƒ | d <d | d <n  | j j
 d	 ƒ d k r| ry- t j | | j  ƒ j } | j d d	 ƒ Wqd
 } qXn  | j j
 d	 ƒ d k  s.| rªt j d t j ƒ } | j | | j  d d ƒ}	 xH|	 j j D]7}
 |
 j t j k rŠqln  |
 j d k rŸqln  x|
 j D]ö } | j t j k rÇq©n  | j d k rÜq©n  yŽ | j ƒ  t | j ƒ d
 k rR| j j t j k rR| j } t j | j  ƒ rh| j! t j" t# | j  ƒ ƒ } qhn | } | j d d	 ƒ PWq©| j d d ƒ | j! | j  j$ d ƒ ƒ } q©Xq©WqlWn  | rÀ| j% | ƒ n  | j& d ƒ rå| j d | j' ƒ n  |  j% | ƒ d S(   s.    insert a content-like element into the entry Ns2   <div xmlns="http://www.w3.org/1999/xhtml">%s</div>s   utf-8R9   t   htmli    R   s	   text/htmlt   xhtmli   t   treet   encodingt   bodyt   divt   languages   xml:lang((   R   R?   R   R   R   t   unicodeR$   R-   R9   R#   t   findR    R   t   parseStringt   documentElementR=   R   t
   HTMLParserR   t   TreeBuildert   parset
   childNodest   nodeTypeR   t   ELEMENT_NODEt   nodeNamet	   normalizet   lent
   firstChildt	   TEXT_NODER
   t   searcht   dataR   R   R   R	   R   R>   RZ   (   R2   R   RQ   t   bozoRk   t   xdivR   t   xcontentt   parserRT   RX   RY   (    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyR,      sb     	.    
	* c         C   s„   | s | r d St  |  d d	 d | ƒ } | j d
 d ƒ t  |  d d d | ƒ } | j d d ƒ |  j | ƒ |  j | ƒ d S(   s$    insert geo location into the entry Ns   %s:%st   geot   lats   %fs   xmlns:%ss(   http://www.w3.org/2003/01/geo/wgs84_pos#t   long(   s   geos   lats	   xmlns:geo(   s   geos   longs	   xmlns:geo(   R   R=   R   (   R2   Rr   Rq   t   xlatt   xlong(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   locationÊ   s     c         C   s  |  j  } t |  d | j d | j d d ƒ ƒ ƒ t |  d | j d d ƒ ƒ t |  d | j d d ƒ ƒ | j d ƒ rª | j d ƒ rª t |  d | j j d d ƒ ƒ n  x' | j d g  ƒ D] } t |  | ƒ q½ Wt |  d | j d	 i  ƒ ƒ x* | j d
 g  ƒ D] } t |  d | ƒ qWt |  | ƒ | j rÈ| j d ƒ rÈ| j	 d ƒ } | j
 d | j d ƒ ƒ |  j | ƒ | j d ƒ rÈ| j	 d ƒ } | j | j | j d ƒ ƒ ƒ |  j | ƒ qÈn  t |  d | j d d ƒ | ƒ t |  d | j d d ƒ | ƒ t |  d | j d d ƒ | ƒ t |  d | j d t j ƒ  ƒ ƒ | rZ| | d <n  | d k s| rrd pud | d <n  | j d ƒ rµ| j d ƒ rµt | d ƒ | d <n  xK | j ƒ  D]= \ }	 }
 |	 j d ƒ rÂt |  |	 j d d d ƒ |
 ƒ qÂqÂWd S(   s&    copy source information to the entry R'   R(   t   icont   logot   imageR8   t   tagsRS   t   author_detailt   contributorst   contributorR)   t   rightst   rights_detailt   subtitlet   subtitle_detailR.   t   updatedt   updated_parsedt   planet_formatt   truet   falset   planet_bozot   planet_names   planet_css-idt   planet_t   _t   :i   N(   R   R   R>   R?   R-   Rx   RI   RS   R5   R   R=   R   R   R,   RH   RA   t   gmtimeR&   t   itemsR   t   replace(   t   xsourcet   sourceRl   t   formatR   RM   R|   R@   t   xtitlet   keyR   (    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyR   Ö   sB    	(""  c         C   sc  t  j d ƒ } | j } | j d t j ƒ | j d ƒ rP | j d | j ƒ n+ |  j j d ƒ r{ | j d |  j j ƒ n  t	 | | ƒ t
 | | ƒ |  j } | j d ƒ s¸ | j rÑ | j | j d ƒ ƒ n  t | d | j d d* ƒ | ƒ t | d | j d d* ƒ | ƒ t | d	 | j d	 d* g ƒ d
 | ƒ t | d | j d d* ƒ | ƒ t | d t |  j | t j ƒ  ƒ ƒ t | d | j d d* ƒ ƒ x' | j d g  ƒ D] } t | | ƒ q¨Wx– d+ g D]‹ \ } } | j d | | j ƒ  f ƒ rÉ|  j j | ƒ rÉt | d | | f | d | | j ƒ  f ƒ } | j d | |  j | ƒ qÉqÉW| j d ƒ rD| j d g  ƒ j d ƒ rD| j d g  ƒ j d ƒ rD| j d g  ƒ }	 |	 j d d* ƒ }
 |	 j d d* ƒ } |
 d k rút | | d
 | d ƒ qD|
 d k s|
 d k s|
 d k rDt | | d
 d
 | d
 d ƒ qDn  | j d ƒ r™| j d ƒ r™t | t | j d d* ƒ ƒ t | j d d* ƒ ƒ ƒ n  | j d  i  ƒ } | rç| j d! ƒ rç|  j j d" ƒ rç|  j d" | d! <n  t | d# | ƒ x* | j d$ g  ƒ D] } t | d% | ƒ q
W| j d& ƒ } | rx9 |  j j ƒ  D]( \ } } | j d' ƒ rI| | | <qIqIW|  j j d( ƒ r¦|  j j	 | d) <q¦n	 |  j } | j d  i  ƒ } | sÏ| j d! ƒ r-| j d! ƒ r-|  j j d" ƒ r-| r| j | j  ƒ  ƒ } n  | | d  <|  j d" | d! <n  | j d& ƒ } t! | | | |  j" ƒ | j | ƒ | S(,   s-    create an entry document from a parsed feed s-   <entry xmlns="http://www.w3.org/2005/Atom"/>
s   xmlns:planetRZ   s   xml:langR)   R.   R+   R1   R,   i    R}   R~   R   t	   publishedt   published_parsedRy   t
   feedburnert   origLinks   %s_%ss   %s:%ss   xmlns:%st   whereR9   t   coordinatest   Pointi   t   Boxt
   LineStringt   Polygont   geo_latt   geo_longRz   R   R‡   RS   R{   R|   R   Rˆ   R'   t	   planet_idN(   s
   feedburnerR–   (#   R   R]   R^   R=   t   planett   xmlnsR-   RZ   t   feedR'   R5   Rl   R)   R   R   R,   R>   R?   RH   t   entry_updatedRA   R‹   RI   R#   t
   namespacesR   Ru   t   floatRS   RŒ   R   t	   __class__t   copyR   t   version(   R¢   R3   R   R2   Rl   RM   t   nsR   t	   xoriglinkR—   R9   R˜   Rz   R|   t   srcR   t
   src_authorRŽ   (    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   reconstitute  s‚    		&%"$&7 	" 
c         C   s]   | d f | d f |  d f f } x5 | D]- \ } } | j  | ƒ r( | | r( | | Sq( W| S(   NR‚   R”   (   R-   (   R¢   R3   t   defaultt   chkst   nodet   field(    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyR£   X  s    		(%   t   __doc__t   reRA   t   sgmllibt   xml.sax.saxutilsR    t   xml.domR   R   t   html5libR   t   html5lib.treebuildersR   R    RC   t   hashlibR   R   t   compilet   UNICODER
   R   R   R    R"   R&   R'   R5   RH   RI   RS   R,   Ru   R   R­   R?   R£   (    (    (    sD   /home/sa3ruby/intertwingly.net/code/venus-bzr/planet/reconstitute.pyt   <module>   s4   $									
		:		,	V