summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzyp <zyp@localhost>2006-10-25 18:14:43 +0200
committerzyp <zyp@localhost>2006-10-25 18:14:43 +0200
commit802668818e930d91c37f8107e4c8822bd3ebdfac (patch)
tree0806c5e4aff2f69e442b687785ab059fc4130069
parent077ce424d11c8344dc4d95bc5b5fa88d67adf643 (diff)
[project @ zyp-20061025161443-953b0f9ed85da86e]
[project @ 64] Implemented caching.
-rw-r--r--anidb.py19
-rw-r--r--pyanidb/hash.py48
2 files changed, 55 insertions, 12 deletions
diff --git a/anidb.py b/anidb.py
index a4ce618..3645905 100644
--- a/anidb.py
+++ b/anidb.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import pyanidb, pyanidb.hash
-import ConfigParser, optparse, os, sys, getpass, multihash
+import ConfigParser, optparse, os, sys, getpass
# Colors.
@@ -34,6 +34,8 @@ op.add_option('-r', '--recursive', help = 'Recurse into directories.',
action = 'store_true', dest = 'recursive', default = False)
op.add_option('-s', '--suffix', help = 'File suffix for recursive matching.',
action = 'append', dest = 'suffix', default = config.get('suffix', '').split())
+op.add_option('-c', '--no-cache', help = 'Do not use cached values.',
+ action = 'store_false', dest = 'cache', default = int(config.get('cache', '1')))
op.add_option('-i', '--identify', help = 'Identify files.',
action = 'store_true', dest = 'identify', default = False)
@@ -48,6 +50,12 @@ options, args = op.parse_args(sys.argv[1:])
# Defaults.
+if options.cache:
+ try:
+ import xattr
+ except ImportError:
+ print red('No xattr, caching disabled.')
+ options.cache = False
options.identify = options.identify or options.rename
options.login = options.add or options.identify
if not options.suffix:
@@ -103,10 +111,9 @@ if options.login:
hashed = unknown = 0
-for filename, hash in pyanidb.hash.hash_files(files):
- size = os.path.getsize(filename)
- print blue('Hashed:'), 'ed2k://|file|%s|%d|%s|' % (filename, size, hash.ed2k())
- fid = (size, hash.ed2k())
+for file in pyanidb.hash.hash_files(files, options.cache):
+ print blue('Hashed:'), 'ed2k://|file|%s|%d|%s|%s' % (file.name, file.size, file.ed2k, ' (cached)' if file.cached else '')
+ fid = (file.size, file.ed2k)
hashed += 1
try:
@@ -137,7 +144,7 @@ for filename, hash in pyanidb.hash.hash_files(files):
s = s.replace('/', '_')
print yellow('Renaming to:'), s
- os.rename(filename, os.path.join(os.path.split(filename)[0], s))
+ os.rename(file.name, os.path.join(os.path.split(file.name)[0], s))
# Adding.
diff --git a/pyanidb/hash.py b/pyanidb/hash.py
index 70f49af..d655d53 100644
--- a/pyanidb/hash.py
+++ b/pyanidb/hash.py
@@ -1,25 +1,61 @@
-import multihash, threading, time
+import multihash, threading, time, os
+try:
+ import xattr
+except ImportError:
+ xattr = None
+
+class File:
+ def __init__(self, name, algorithms, cache):
+ self.name = name
+ self.size = os.path.getsize(name)
+ self.mtime = os.path.getmtime(name)
+ self.cached = False
+ if cache:
+ self.read_cache()
+ if sum([not hasattr(self, a) for a in algorithms]):
+ self.cached = False
+ h = multihash.hash_file(name, algorithms)
+ for a in algorithms:
+ setattr(self, a, getattr(h, a)())
+ self.write_cache()
+
+ def read_cache(self):
+ cache = dict([(n[13:], xattr.getxattr(self.name, n)) for n in xattr.listxattr(self.name) if n.startswith('user.pyanidb.')])
+ if 'mtime' not in cache or str(int(self.mtime)) != cache.pop('mtime'):
+ return
+ for n, v in cache.iteritems():
+ setattr(self, n, v)
+ self.cached = True
+
+ def write_cache(self):
+ try:
+ xattr.setxattr(self.name, 'user.pyanidb.mtime', str(int(self.mtime)))
+ for n in ('ed2k', 'md5', 'sha1', 'crc32'):
+ if hasattr(self, n):
+ xattr.setxattr(self.name, 'user.pyanidb.' + n, getattr(self, n))
+ except IOError:
+ pass
class Hashthread(threading.Thread):
- def __init__(self, filelist, hashlist, algorithms, *args, **kwargs):
+ def __init__(self, filelist, hashlist, algorithms, cache, *args, **kwargs):
self.filelist = filelist
self.hashlist = hashlist
self.algorithms = algorithms
+ self.cache = cache
threading.Thread.__init__(self, *args, **kwargs)
def run(self):
try:
while 1:
f = self.filelist.pop(0)
- h = multihash.hash_file(f, self.algorithms)
- self.hashlist.append((f, h))
+ self.hashlist.append(File(f, self.algorithms, self.cache))
except IndexError:
return
-def hash_files(files, num_threads = 1, algorithms = ('ed2k',)):
+def hash_files(files, cache = False, num_threads = 1, algorithms = ('ed2k',)):
hashlist = []
threads = []
for x in xrange(num_threads):
- thread = Hashthread(files, hashlist, algorithms)
+ thread = Hashthread(files, hashlist, algorithms, cache)
thread.start()
threads.append(thread)
while hashlist or sum([thread.isAlive() for thread in threads]):