summaryrefslogtreecommitdiff
path: root/pyanidb/hash.py
blob: 9727ca23f9f639fc1f22819630f0dbb73106c07b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import threading, time, os, hashlib
try:
	import xattr
except ImportError:
	xattr = None

class Hash:
	def __init__(self, filename, algorithms):
		update_list = [getattr(self, 'update_%s' % a) for a in algorithms]
		self.md4_partial = hashlib.new('md4')
		self.md4_final = hashlib.new('md4')
		self.size_total = 0
		f = open(filename)
		data = f.read(131072)
		while data:
			for u in update_list:
				u(data)
			data = f.read(131072)
	
	def update_ed2k(self, data):
		pos = 0
		while pos < len(data):
			if (not (self.size_total % 9728000)) and self.size_total:
				self.md4_final.update(self.md4_partial.digest())
				self.md4_partial = hashlib.new('md4')
			size = min(len(data) - pos, 9728000 - (self.size_total % 9728000))
			self.md4_partial.update(data[pos:pos + size])
			pos += size
			self.size_total += size
	
	def ed2k(self):
		if self.size_total > 9728000:
			self.md4_final.update(self.md4_partial.digest())
			return self.md4_final.hexdigest()
		return self.md4_partial.hexdigest()

class File:
	def __init__(self, name, algorithms, cache):
		self.name = name
		self.size = os.path.getsize(name)
		self.mtime = os.path.getmtime(name)
		self.cached = False
		if cache:
			self.read_cache()
		if False in [hasattr(self, a) for a in algorithms]:
			self.cached = False
			h = Hash(name, algorithms)
			for a in algorithms:
				setattr(self, a, getattr(h, a)())
			self.write_cache()
	
	def read_cache(self):
		if not xattr:
			return
		cache = dict([(n[13:], xattr.getxattr(self.name, n)) for n in xattr.listxattr(self.name) if n.startswith('user.pyanidb.')])
		if 'mtime' not in cache or str(int(self.mtime)) != cache.pop('mtime'):
			return
		for n, v in cache.iteritems():
			setattr(self, n, v)
		self.cached = True
	
	def write_cache(self):
		if not xattr:
			return
		try:
			self.clear_cache()
			xattr.setxattr(self.name, 'user.pyanidb.mtime', str(int(self.mtime)))
			for n in ('ed2k', 'md5', 'sha1', 'crc32'):
				if hasattr(self, n):
					xattr.setxattr(self.name, 'user.pyanidb.' + n, getattr(self, n))
		except IOError:
			pass
	
	def clear_cache(self):
		for name in xattr.listxattr(self.name):
			if name.startswith('user.pyanidb.'):
				xattr.removexattr(self.name, name)

class Hashthread(threading.Thread):
	def __init__(self, filelist, hashlist, algorithms, cache, *args, **kwargs):
		self.filelist = filelist
		self.hashlist = hashlist
		self.algorithms = algorithms
		self.cache = cache
		threading.Thread.__init__(self, *args, **kwargs)
	def run(self):
		try:
			while 1:
				f = self.filelist.pop(0)
				self.hashlist.append(File(f, self.algorithms, self.cache))
		except IndexError:
			return

def hash_files(files, cache = False, algorithms = ('ed2k',), num_threads = 1):
	hashlist = []
	threads = []
	for x in xrange(num_threads):
		thread = Hashthread(files, hashlist, algorithms, cache)
		thread.start()
		threads.append(thread)
	while hashlist or sum([thread.isAlive() for thread in threads]):
		try:
			yield hashlist.pop(0)
		except IndexError:
			time.sleep(0.1)
	raise StopIteration