#!/usr/bin/python from config import directory import os, os.path, time ######################################################################### # Remove items from the cache # ######################################################################### def clear(entry=None): from glob import glob files = glob(directory.cache+"index.*") + glob(directory.cache+"all.*") date=time.localtime() if entry: if directory.cache+"index.rss" in files: files.remove(directory.cache+"index.rss") files += glob(directory.cache+"comments.*") files += glob(directory.cache+entry+".*") date=time.localtime(os.stat(directory.data + entry+".txt").st_mtime) from entry import post link=directory.cache+post(entry).link() files += glob('/'.join(link.split('/')[:-1]+['*'])) from atomize import atomize atomize(entry).close() import search search.decache(entry) date = time.strftime("%Y %m %d",date).split() files += [directory.cache+"/index.html5"] for i in range(0,4): path="/".join(date[:i]) files += [directory.cache+path+"/index.html"] files += [directory.cache+path+"/index.atom"] if not entry: files += [directory.cache+"archives/"+path+"/index.html"] files += [directory.cache+"archives/"+path+"/index.atom"] for file in files: if os.path.exists(file): if os.path.isdir(file): os.remove(file+"/index.html") if not os.listdir(file): os.rmdir(file) else: os.remove(file) ######################################################################### # Add an item to the cache # ######################################################################### def save(path, data): data=str(data) if path.endswith('/'): return data if isinstance(path,list): path="/".join(path) if path[0]=='/': path=path[1:] mime_boundary = data.find('\n\n') if data[:mime_boundary].split()[0].lower()<>'status:': # create any required directories dir=path[:path.rfind('/')+1] if dir and not os.path.exists(directory.cache+dir): dir=dir.split('/') for i in range(0,len(dir)): parent="/".join(dir[:i+1]) if not os.path.exists(directory.cache+parent): try: os.mkdir(directory.cache+parent) except: pass # timing issue os.chmod(directory.cache+parent, 0777) # safety first if os.path.exists(directory.cache+path): return data # save the data to the cache directory file = open(directory.cache+path, "w") file.write(data[mime_boundary+2:]) file.close() os.chmod(directory.cache+path, 0666) if path=='index.atom': delta("index") if path=='comments.atom': delta("comments") return data ######################################################################### # Precompute deltas for RFC 3229 support # ######################################################################### def delta(feed): from glob import glob import httplib, re # get current page connection = httplib.HTTPConnection("intertwingly.net") connection.request("GET", "/blog/%s.atom" % feed) response = connection.getresponse() headers = response.msg.items() etag = response.getheader('ETag') # etag is vital to this... remove W/ prefix if not etag: return etag=etag[etag.find('"'):] # divide feed into entries pattern=re.compile("(\s*.*?)",re.DOTALL) latest=re.split(pattern,response.read()) if not latest: return # remove existing "asis" files... updating the base files access times for asisfile in glob(directory.history + feed + '*.asis'): if os.path.exists(asisfile): asis_stats=os.stat(asisfile) os.remove(asisfile) basefile=asisfile.replace('.asis','.base') if os.path.exists(basefile): base_stats=os.stat(basefile) os.utime(basefile,(asis_stats.st_atime,base_stats.st_mtime)) # for each basefile in history for basefile in glob(directory.history + feed + '*.base'): # don't compute delta on current file if etag and basefile.find(etag)>=0: continue # parse and split basefile, retaining stats stats=os.stat(basefile) base=re.split(pattern, open(basefile).read()) os.utime(basefile,(stats.st_atime,stats.st_mtime)) # asis = latest - base asis=latest[:] for i in range(1,len(asis),2): if asis[i] in base: asis[i]='' asis=''.join(asis) # write out asis file, with headers asisfile=open(basefile.replace('.base','.asis'),'w') asisfile.write("Status: 226 IM Used\r\n") for (name,value) in headers: if name.lower()=='etag': value=etag if name.lower()=='content-length': value=str(len(asis)) if name.lower()=='vary': value += ",If-None-Match" name='-'.join(map(lambda(x):x.capitalize(), name.split('-'))) asisfile.write("%s: %s\r\n" % (name, value)) asisfile.write("IM: feed\r\n") asisfile.write("\r\n%s" % asis) asisfile.close() # write out new base file # basefile=open('%s%s.%s.base' % (directory.history, feed, etag), 'w') # basefile.write(''.join(latest)) # basefile.close() ######################################################################### # Serve an item from the cache # ######################################################################### def serve(path, query=None): import search if search.isStale(path, query): return None from httplib import HTTP h=HTTP('www.intertwingly.net') h.putrequest('GET','/blog/'+path) h.putheader('Host','www.intertwingly.net') h.putheader('Accept-Encoding',os.environ.get('HTTP_ACCEPT_ENCODING','')) h.putheader('Accept',os.environ.get('HTTP_ACCEPT','')) h.endheaders() (code,msg,headers)=h.getreply() if code>=400: return None else: result="Status: %s %s\r\n%s\r\n" % (code,msg,''.join(headers.headers)) result+=h.file.read() h.file.close() return result ######################################################################### # Main: clear specified entries from the cache # ######################################################################### if __name__ == '__main__': import sys if len(sys.argv)==1: clear() else: for entry in sys.argv[1:]: clear(entry)