1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
import threading, time, os, hashlib, binascii
try:
import xattr
except ImportError:
xattr = None
class Ed2k:
def __init__(self):
self.md4_partial = hashlib.new('md4')
self.md4_final = hashlib.new('md4')
self.size_total = 0
def update(self, data):
pos = 0
while pos < len(data):
if (not (self.size_total % 9728000)) and self.size_total:
self.md4_final.update(self.md4_partial.digest())
self.md4_partial = hashlib.new('md4')
size = min(len(data) - pos, 9728000 - (self.size_total % 9728000))
self.md4_partial.update(data[pos:pos + size])
pos += size
self.size_total += size
def hexdigest(self):
if self.size_total > 9728000:
self.md4_final.update(self.md4_partial.digest())
return self.md4_final.hexdigest()
return self.md4_partial.hexdigest()
class Crc32:
def __init__(self):
self.s = 0
def update(self, data):
self.s = binascii.crc32(data, self.s)
def hexdigest(self):
return '%08x' % (self.s & 0xffffffff)
hasher_obj = {
'ed2k': Ed2k,
'md5': lambda: hashlib.new('md5'),
'sha1': lambda: hashlib.new('sha1'),
'crc32': Crc32,
}
class Hash:
def __init__(self, filename, algorithms):
update_list = []
for a in algorithms:
h = hasher_obj[a]()
update_list.append(h.update)
setattr(self, a, h.hexdigest)
f = open(filename)
data = f.read(131072)
while data:
for u in update_list:
u(data)
data = f.read(131072)
class File:
def __init__(self, name, algorithms, cache):
self.name = name
self.size = os.path.getsize(name)
self.mtime = os.path.getmtime(name)
self.cached = False
if cache:
self.read_cache()
if False in [hasattr(self, a) for a in algorithms]:
self.cached = False
h = Hash(name, algorithms)
for a in algorithms:
setattr(self, a, getattr(h, a)())
self.write_cache()
def read_cache(self):
if not xattr:
return
cache = dict([(n[13:], xattr.getxattr(self.name, n)) for n in xattr.listxattr(self.name) if n.startswith('user.pyanidb.')])
if 'mtime' not in cache or str(int(self.mtime)) != cache.pop('mtime'):
return
for n, v in cache.iteritems():
setattr(self, n, v)
self.cached = True
def write_cache(self):
if not xattr:
return
try:
self.clear_cache()
xattr.setxattr(self.name, 'user.pyanidb.mtime', str(int(self.mtime)))
for n in ('ed2k', 'md5', 'sha1', 'crc32'):
if hasattr(self, n):
xattr.setxattr(self.name, 'user.pyanidb.' + n, getattr(self, n))
except IOError:
pass
def clear_cache(self):
for name in xattr.listxattr(self.name):
if name.startswith('user.pyanidb.'):
xattr.removexattr(self.name, name)
class Hashthread(threading.Thread):
def __init__(self, filelist, hashlist, algorithms, cache, *args, **kwargs):
self.filelist = filelist
self.hashlist = hashlist
self.algorithms = algorithms
self.cache = cache
threading.Thread.__init__(self, *args, **kwargs)
def run(self):
try:
while 1:
f = self.filelist.pop(0)
self.hashlist.append(File(f, self.algorithms, self.cache))
except IndexError:
return
def hash_files(files, cache = False, algorithms = ('ed2k',), num_threads = 1):
hashlist = []
threads = []
for x in xrange(num_threads):
thread = Hashthread(files, hashlist, algorithms, cache)
thread.start()
threads.append(thread)
while hashlist or sum([thread.isAlive() for thread in threads]):
try:
yield hashlist.pop(0)
except IndexError:
time.sleep(0.1)
raise StopIteration
|