From 802668818e930d91c37f8107e4c8822bd3ebdfac Mon Sep 17 00:00:00 2001 From: zyp Date: Wed, 25 Oct 2006 16:14:43 +0000 Subject: [project @ zyp-20061025161443-953b0f9ed85da86e] [project @ 64] Implemented caching. --- anidb.py | 19 +++++++++++++------ pyanidb/hash.py | 48 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/anidb.py b/anidb.py index a4ce618..3645905 100644 --- a/anidb.py +++ b/anidb.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import pyanidb, pyanidb.hash -import ConfigParser, optparse, os, sys, getpass, multihash +import ConfigParser, optparse, os, sys, getpass # Colors. @@ -34,6 +34,8 @@ op.add_option('-r', '--recursive', help = 'Recurse into directories.', action = 'store_true', dest = 'recursive', default = False) op.add_option('-s', '--suffix', help = 'File suffix for recursive matching.', action = 'append', dest = 'suffix', default = config.get('suffix', '').split()) +op.add_option('-c', '--no-cache', help = 'Do not use cached values.', + action = 'store_false', dest = 'cache', default = int(config.get('cache', '1'))) op.add_option('-i', '--identify', help = 'Identify files.', action = 'store_true', dest = 'identify', default = False) @@ -48,6 +50,12 @@ options, args = op.parse_args(sys.argv[1:]) # Defaults. +if options.cache: + try: + import xattr + except ImportError: + print red('No xattr, caching disabled.') + options.cache = False options.identify = options.identify or options.rename options.login = options.add or options.identify if not options.suffix: @@ -103,10 +111,9 @@ if options.login: hashed = unknown = 0 -for filename, hash in pyanidb.hash.hash_files(files): - size = os.path.getsize(filename) - print blue('Hashed:'), 'ed2k://|file|%s|%d|%s|' % (filename, size, hash.ed2k()) - fid = (size, hash.ed2k()) +for file in pyanidb.hash.hash_files(files, options.cache): + print blue('Hashed:'), 'ed2k://|file|%s|%d|%s|%s' % (file.name, file.size, file.ed2k, ' (cached)' if file.cached else '') + fid = (file.size, file.ed2k) hashed += 1 try: @@ -137,7 +144,7 @@ for filename, hash in pyanidb.hash.hash_files(files): s = s.replace('/', '_') print yellow('Renaming to:'), s - os.rename(filename, os.path.join(os.path.split(filename)[0], s)) + os.rename(file.name, os.path.join(os.path.split(file.name)[0], s)) # Adding. diff --git a/pyanidb/hash.py b/pyanidb/hash.py index 70f49af..d655d53 100644 --- a/pyanidb/hash.py +++ b/pyanidb/hash.py @@ -1,25 +1,61 @@ -import multihash, threading, time +import multihash, threading, time, os +try: + import xattr +except ImportError: + xattr = None + +class File: + def __init__(self, name, algorithms, cache): + self.name = name + self.size = os.path.getsize(name) + self.mtime = os.path.getmtime(name) + self.cached = False + if cache: + self.read_cache() + if sum([not hasattr(self, a) for a in algorithms]): + self.cached = False + h = multihash.hash_file(name, algorithms) + for a in algorithms: + setattr(self, a, getattr(h, a)()) + self.write_cache() + + def read_cache(self): + cache = dict([(n[13:], xattr.getxattr(self.name, n)) for n in xattr.listxattr(self.name) if n.startswith('user.pyanidb.')]) + if 'mtime' not in cache or str(int(self.mtime)) != cache.pop('mtime'): + return + for n, v in cache.iteritems(): + setattr(self, n, v) + self.cached = True + + def write_cache(self): + try: + xattr.setxattr(self.name, 'user.pyanidb.mtime', str(int(self.mtime))) + for n in ('ed2k', 'md5', 'sha1', 'crc32'): + if hasattr(self, n): + xattr.setxattr(self.name, 'user.pyanidb.' + n, getattr(self, n)) + except IOError: + pass class Hashthread(threading.Thread): - def __init__(self, filelist, hashlist, algorithms, *args, **kwargs): + def __init__(self, filelist, hashlist, algorithms, cache, *args, **kwargs): self.filelist = filelist self.hashlist = hashlist self.algorithms = algorithms + self.cache = cache threading.Thread.__init__(self, *args, **kwargs) def run(self): try: while 1: f = self.filelist.pop(0) - h = multihash.hash_file(f, self.algorithms) - self.hashlist.append((f, h)) + self.hashlist.append(File(f, self.algorithms, self.cache)) except IndexError: return -def hash_files(files, num_threads = 1, algorithms = ('ed2k',)): +def hash_files(files, cache = False, num_threads = 1, algorithms = ('ed2k',)): hashlist = [] threads = [] for x in xrange(num_threads): - thread = Hashthread(files, hashlist, algorithms) + thread = Hashthread(files, hashlist, algorithms, cache) thread.start() threads.append(thread) while hashlist or sum([thread.isAlive() for thread in threads]): -- cgit v1.2.3