import threading, time, os, hashlib try: import xattr except ImportError: xattr = None class Hash: def __init__(self, filename, algorithms): update_list = [getattr(self, 'update_%s' % a) for a in algorithms] self.md4_partial = hashlib.new('md4') self.md4_final = hashlib.new('md4') self.size_total = 0 f = open(filename) data = f.read(131072) while data: for u in update_list: u(data) data = f.read(131072) def update_ed2k(self, data): pos = 0 while pos < len(data): if (not (self.size_total % 9728000)) and self.size_total: self.md4_final.update(self.md4_partial.digest()) self.md4_partial = hashlib.new('md4') size = min(len(data) - pos, 9728000 - (self.size_total % 9728000)) self.md4_partial.update(data[pos:pos + size]) pos += size self.size_total += size def ed2k(self): if self.size_total > 9728000: self.md4_final.update(self.md4_partial.digest()) return self.md4_final.hexdigest() return self.md4_partial.hexdigest() class File: def __init__(self, name, algorithms, cache): self.name = name self.size = os.path.getsize(name) self.mtime = os.path.getmtime(name) self.cached = False if cache: self.read_cache() if False in [hasattr(self, a) for a in algorithms]: self.cached = False h = Hash(name, algorithms) for a in algorithms: setattr(self, a, getattr(h, a)()) self.write_cache() def read_cache(self): if not xattr: return cache = dict([(n[13:], xattr.getxattr(self.name, n)) for n in xattr.listxattr(self.name) if n.startswith('user.pyanidb.')]) if 'mtime' not in cache or str(int(self.mtime)) != cache.pop('mtime'): return for n, v in cache.iteritems(): setattr(self, n, v) self.cached = True def write_cache(self): if not xattr: return try: self.clear_cache() xattr.setxattr(self.name, 'user.pyanidb.mtime', str(int(self.mtime))) for n in ('ed2k', 'md5', 'sha1', 'crc32'): if hasattr(self, n): xattr.setxattr(self.name, 'user.pyanidb.' + n, getattr(self, n)) except IOError: pass def clear_cache(self): for name in xattr.listxattr(self.name): if name.startswith('user.pyanidb.'): xattr.removexattr(self.name, name) class Hashthread(threading.Thread): def __init__(self, filelist, hashlist, algorithms, cache, *args, **kwargs): self.filelist = filelist self.hashlist = hashlist self.algorithms = algorithms self.cache = cache threading.Thread.__init__(self, *args, **kwargs) def run(self): try: while 1: f = self.filelist.pop(0) self.hashlist.append(File(f, self.algorithms, self.cache)) except IndexError: return def hash_files(files, cache = False, algorithms = ('ed2k',), num_threads = 1): hashlist = [] threads = [] for x in xrange(num_threads): thread = Hashthread(files, hashlist, algorithms, cache) thread.start() threads.append(thread) while hashlist or sum([thread.isAlive() for thread in threads]): try: yield hashlist.pop(0) except IndexError: time.sleep(0.1) raise StopIteration