import config, os from config import directory from glob import glob SyntaxError = "Syntax Error" ######################################################################### # Issue a search, from the cache, if possible ######################################################################### def search(query, entry="*", index=config.index): # check the cache if entry=="*": import md5, os.path cache=directory.querycache + md5.new(query).hexdigest() + ".list" if not os.path.exists(directory.querycache): os.mkdir(directory.querycache) if os.path.exists(cache): return file(cache).read().split('\n') # issue the search if query[:1] == '/': result=xpath(query, entry) else: import swish_e result=swish_e.search(index, query) # extract the entry names from the file paths result=map(lambda x:x.split('/')[-1].split('.')[0], result) # write the cache if entry=="*": flist=open(cache,'w') flist.write('\n'.join(result)) flist.close() fquery=open('.'.join(cache.split('.')[:-1])+".query",'w') fquery.write(query) fquery.close() # return the results return result ######################################################################### # Determine if a given file is stale relative to a query ######################################################################### def isStale(file,query): import os,md5 if not query: return False file=os.path.join(directory.cache,file) cache=directory.querycache + md5.new(query).hexdigest() + ".query" try: return (os.stat(cache).st_mtime > os.stat(file).st_mtime) except OSError: return True ######################################################################### # Index a directory ######################################################################### def index(sources=directory.atom, target=config.index): from commands import getoutput push = os.getcwd() try: if sources.endswith('/'): os.chdir(sources) sources = '*.atom' return getoutput("%s -i %s -c %sswish_e.config -f %s" % (config.indexer, sources, directory.codebase, target)) finally: os.chdir(push) ######################################################################### # Implement Xpath based searches ######################################################################### def xpath(query, entry="*"): import libxml2 # test the xpath expression for validity msg=[] libxml2.registerErrorHandler(lambda msg,str: msg.append(str), msg) try: doc = libxml2.parseDoc("") ctxt = doc.xpathNewContext() ctxt.xpathRegisterNs("xhtml","http://www.w3.org/1999/xhtml") ctxt.xpathRegisterNs("atom","http://purl.org/atom/ns#") ctxt.xpathRegisterNs("foaf","http://xmlns.com/foaf/0.1/") ctxt.xpathEval(query) doc.freeDoc() except libxml2.xpathError,e: if not msg: msg=str(e) raise SyntaxError, ''.join(msg) + ": " + query # now that the query checks out, go ahead and execute the query result=[] libxml2.registerErrorHandler(lambda ctx,str: None, None) for atom in glob(directory.atom+entry+'.atom'): doc = libxml2.parseFile(atom) ctxt = doc.xpathNewContext() ctxt.xpathRegisterNs("xhtml","http://www.w3.org/1999/xhtml") ctxt.xpathRegisterNs("atom","http://purl.org/atom/ns#") ctxt.xpathRegisterNs("foaf","http://xmlns.com/foaf/0.1/") if ctxt.xpathEval(query): result.append(atom) doc.freeDoc() return result ######################################################################### # Remove cached queries based on updated entries # ######################################################################### def decache(entry): import os entryindex=str(directory.atom+entry+".index") index(directory.atom+entry+".atom", entryindex) for query in glob(directory.querycache+'*.query')[:]: list='.'.join(query.split('.')[:-1])+'.list' if not os.path.exists(list): os.remove(query) else: stats=[os.stat(query),os.stat(list)] match = (entry in (search(open(query).read(),entry,entryindex))) if match <> (entry in open(list).read().split('\n')): os.remove(query) os.remove(list) else: os.utime(query,(stats[0].st_atime,stats[0].st_mtime)) os.utime(list,(stats[1].st_atime,stats[1].st_mtime)) for file in glob(entryindex+"*")[:]: os.remove(file) ######################################################################### # Main: test a specific query # ######################################################################### if __name__ == "__main__": import sys,urllib if sys.argv[1:]==['-reindex']: print index() else: print search(urllib.unquote(' '.join(sys.argv[1:])))