summaryrefslogtreecommitdiff
path: root/pyanidb/hash.py
blob: 4aab45fdcdb0147cb2c5436c2971e9e60cabf9ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import multihash, threading, time, os
try:
	import xattr
except ImportError:
	xattr = None

class File:
	def __init__(self, name, algorithms, cache):
		self.name = name
		self.size = os.path.getsize(name)
		self.mtime = os.path.getmtime(name)
		self.cached = False
		if cache:
			self.read_cache()
		if False in [hasattr(self, a) for a in algorithms]:
			self.cached = False
			h = multihash.hash_file(name, algorithms)
			for a in algorithms:
				setattr(self, a, getattr(h, a)())
			self.write_cache()
	
	def read_cache(self):
		if not xattr:
			return
		cache = dict([(n[13:], xattr.getxattr(self.name, n)) for n in xattr.listxattr(self.name) if n.startswith('user.pyanidb.')])
		if 'mtime' not in cache or str(int(self.mtime)) != cache.pop('mtime'):
			return
		for n, v in cache.iteritems():
			setattr(self, n, v)
		self.cached = True
	
	def write_cache(self):
		if not xattr:
			return
		try:
			self.clear_cache()
			xattr.setxattr(self.name, 'user.pyanidb.mtime', str(int(self.mtime)))
			for n in ('ed2k', 'md5', 'sha1', 'crc32'):
				if hasattr(self, n):
					xattr.setxattr(self.name, 'user.pyanidb.' + n, getattr(self, n))
		except IOError:
			pass
	
	def clear_cache(self):
		for name in xattr.listxattr(self.name):
			if name.startswith('user.pyanidb.'):
				xattr.removexattr(self.name, name)

class Hashthread(threading.Thread):
	def __init__(self, filelist, hashlist, algorithms, cache, *args, **kwargs):
		self.filelist = filelist
		self.hashlist = hashlist
		self.algorithms = algorithms
		self.cache = cache
		threading.Thread.__init__(self, *args, **kwargs)
	def run(self):
		try:
			while 1:
				f = self.filelist.pop(0)
				self.hashlist.append(File(f, self.algorithms, self.cache))
		except IndexError:
			return

def hash_files(files, cache = False, algorithms = ('ed2k',), num_threads = 1):
	hashlist = []
	threads = []
	for x in xrange(num_threads):
		thread = Hashthread(files, hashlist, algorithms, cache)
		thread.start()
		threads.append(thread)
	while hashlist or sum([thread.isAlive() for thread in threads]):
		try:
			yield hashlist.pop(0)
		except IndexError:
			time.sleep(0.1)
	raise StopIteration