From 0c4fc1e72770fc78d87891e5c031272fca59e409 Mon Sep 17 00:00:00 2001 From: zyp Date: Tue, 9 May 2006 20:33:27 +0000 Subject: [project @ zyp-20060509203327-c8c93b489da2ea46] [project @ 27] Multihash library rewrite complete. --- multihash/__init__.py | 26 +++++++++------ src/crc32.cpp | 73 ++++++++++++++++++++++------------------ src/crc32.h | 17 +++++++--- src/ed2k.cpp | 74 +++++++++++++++++++++-------------------- src/ed2k.h | 25 ++++++++------ src/hash.cpp | 63 +++++++++++++++++++++++++++++++++++ src/hash.h | 27 +++++++++++++++ src/md5.cpp | 17 ++++++++++ src/md5.h | 20 +++++++++++ src/multihash.cpp | 85 ----------------------------------------------- src/multihash.h | 37 --------------------- src/multihash_wrapper.cpp | 14 -------- src/sha1.cpp | 17 ++++++++++ src/sha1.h | 20 +++++++++++ src/wrapper.cpp | 29 ++++++++++++++++ 15 files changed, 315 insertions(+), 229 deletions(-) create mode 100644 src/hash.cpp create mode 100644 src/hash.h create mode 100644 src/md5.cpp create mode 100644 src/md5.h delete mode 100644 src/multihash.cpp delete mode 100644 src/multihash.h delete mode 100644 src/multihash_wrapper.cpp create mode 100644 src/sha1.cpp create mode 100644 src/sha1.h create mode 100644 src/wrapper.cpp diff --git a/multihash/__init__.py b/multihash/__init__.py index 5bcf4d5..5e30186 100644 --- a/multihash/__init__.py +++ b/multihash/__init__.py @@ -1,11 +1,17 @@ -from _multihash import * +from _multihash import CRC32, Ed2k, MD5, SHA1 -def file_hash(name): - h = Multihash() - f = open(name) - data = f.read(32768) - while data: - h.update(data) - data = f.read(32768) - f.close() - return h \ No newline at end of file +hashes = { + 'crc32': CRC32, + 'ed2k': Ed2k, + 'md5': MD5, + 'sha1': SHA1} + +class Multihash: + def __init__(self, *args): + if not args: + args = hashes.keys() + h = None + for hash in args: + h = hashes[hash](h) + setattr(self, hash, h.digest) + self.update = h.update diff --git a/src/crc32.cpp b/src/crc32.cpp index 55d6dd2..6623166 100644 --- a/src/crc32.cpp +++ b/src/crc32.cpp @@ -1,41 +1,50 @@ #include "crc32.h" -int* CRC32::crc_table; - -int* CRC32::generate_table() { - int crc; - int* table = new int[256]; - for(int i = 0; i < 256; i++) { - crc = i << 24; - for(int j = 0; j < 8; j++) { - if(crc & 0x80000000) { - crc = (crc << 1) ^ 0x04c11db7; - } else { - crc = crc << 1; +namespace Multihash { + int* crc_table = 0; + + void generate_table() { + int crc; + crc_table = new int[256]; + for(int i = 0; i < 256; i++) { + crc = i << 24; + for(int j = 0; j < 8; j++) { + if(crc & 0x80000000) { + crc = (crc << 1) ^ 0x04c11db7; + } else { + crc = crc << 1; + } } + crc_table[i] = crc; } - table[i] = crc; } - return table; -} - -int CRC32::reflect(int data, int bits) { - int x = 0; - for(int i = 0; i < bits; i++) { - x = x << 1; - x |= data & 1; - data = data >> 1; + + int reflect(int data, int bits) { + int x = 0; + for(int i = 0; i < bits; i++) { + x = x << 1; + x |= data & 1; + data = data >> 1; + } + return x; } - return x; -} - -int CRC32::crc32(int crc, const char* data, int length) { - crc = ~reflect(crc, 32); - if(!crc_table) { - crc_table = generate_table(); + + CRC32::CRC32(Hash* n) : Hash(n) { + if(!crc_table) { + generate_table(); + } + crc_ctx = 0; + } + + void CRC32::hash_update(const char* data, int length) { + int crc = ~reflect(crc_ctx, 32); + for (int i = 0; i < length; i++) { + crc = (crc << 8) ^ crc_table[((crc >> 24) ^ reflect(data[i], 8)) & 0xff]; + } + crc_ctx = ~reflect(crc, 32); } - for (int i = 0; i < length; i++) { - crc = (crc << 8) ^ crc_table[((crc >> 24) ^ reflect(data[i], 8)) & 0xff]; + + std::string CRC32::hash_digest() { + return Hex::hex(crc_ctx); } - return ~reflect(crc, 32); } diff --git a/src/crc32.h b/src/crc32.h index 646ccce..c43c417 100644 --- a/src/crc32.h +++ b/src/crc32.h @@ -1,11 +1,18 @@ #ifndef _CRC32_H_ #define _CRC32_H_ -namespace CRC32 { - extern int* crc_table; - int* generate_table(); - int reflect(int data, int bits); - int crc32(int crc, const char* data, int length); +#include "hash.h" + +namespace Multihash { + class CRC32 : public Hash { + private: + int crc_ctx; + protected: + virtual void hash_update(const char* data, int length); + virtual std::string hash_digest(); + public: + CRC32(Hash* n = 0); + }; } #endif // _CRC32_H_ diff --git a/src/ed2k.cpp b/src/ed2k.cpp index 92e7b15..2d4b2f7 100644 --- a/src/ed2k.cpp +++ b/src/ed2k.cpp @@ -1,41 +1,43 @@ #include "ed2k.h" -template -inline T min(T a, T b) { - return (a > b) ? b : a; -} - -Ed2k::Ed2k() { - MD4_Init(&md4_partial); - MD4_Init(&md4_final); - size_total = 0; -} - -void Ed2k::update(const char* data, int length) { - while(length) { - if(!(size_total % (9500 * 1024)) && size_total) { - unsigned char digest[16]; - MD4_Final(digest, &md4_partial); - MD4_Update(&md4_final, digest, 16); - MD4_Init(&md4_partial); +namespace Multihash { + template + inline T min(T a, T b) { + return (a > b) ? b : a; + } + + Ed2k::Ed2k(Hash* n) : Hash(n) { + MD4_Init(&md4_partial); + MD4_Init(&md4_final); + size_total = 0; + } + + void Ed2k::hash_update(const char* data, int length) { + while(length) { + if(!(size_total % (9500 * 1024)) && size_total) { + unsigned char digest[16]; + MD4_Final(digest, &md4_partial); + MD4_Update(&md4_final, digest, 16); + MD4_Init(&md4_partial); + } + int size = min(length, (9500 * 1024) - (size_total % (9500 * 1024))); + MD4_Update(&md4_partial, data, size); + length -= size; + data += size; + size_total += size; + }; + } + + std::string Ed2k::hash_digest() { + char digest[16]; + if(size_total > (9500 * 1024)) { + unsigned char digest_partial[16]; + MD4_Final(digest_partial, &md4_partial); + MD4_Update(&md4_final, digest_partial, 16); + MD4_Final((unsigned char*)digest, &md4_final); + } else { + MD4_Final((unsigned char*)digest, &md4_partial); } - int size = min(length, (9500 * 1024) - (size_total % (9500 * 1024))); - MD4_Update(&md4_partial, data, size); - length -= size; - data += size; - size_total += size; - }; -} - -char* Ed2k::digest() { - char* digest = new char[16]; - if(size_total > (9500 * 1024)) { - unsigned char digest_partial[16]; - MD4_Final(digest_partial, &md4_partial); - MD4_Update(&md4_final, digest_partial, 16); - MD4_Final((unsigned char*)digest, &md4_final); - } else { - MD4_Final((unsigned char*)digest, &md4_partial); + return Hex::hex(digest, 16); } - return digest; } diff --git a/src/ed2k.h b/src/ed2k.h index 1be7302..52a4dc5 100644 --- a/src/ed2k.h +++ b/src/ed2k.h @@ -1,17 +1,22 @@ #ifndef _ED2K_H_ #define _ED2K_H_ +#include "hash.h" + #include -class Ed2k { - private: - MD4_CTX md4_partial; - MD4_CTX md4_final; - unsigned int size_total; - public: - Ed2k(); - void update(const char* data, int length); - char* digest(); -}; +namespace Multihash { + class Ed2k : public Hash { + private: + MD4_CTX md4_partial; + MD4_CTX md4_final; + unsigned int size_total; + protected: + virtual void hash_update(const char* data, int length); + virtual std::string hash_digest(); + public: + Ed2k(Hash* n = 0); + }; +} #endif // _ED2K_H_ diff --git a/src/hash.cpp b/src/hash.cpp new file mode 100644 index 0000000..89b14cc --- /dev/null +++ b/src/hash.cpp @@ -0,0 +1,63 @@ +#include "hash.h" +#include "crc32.h" + +#include +#include + +namespace Multihash { + namespace Hex { + static char* digits = "0123456789abcdef"; + std::string hex(char* bin, int length) { + std::string s(length * 2, ' '); + for(int i = 0; i < length; i++) { + s[i*2] = digits[(bin[i] >> 4) & 0xf]; + s[i*2+1] = digits[bin[i] & 0xf]; + } + return s; + } + std::string hex(int bin) { + std::string s(sizeof(int) * 2, ' '); + for(int i = 0; i < sizeof(int) * 2; i++) { + s[sizeof(int) * 2 - 1 - i] = digits[bin & 0xf]; + bin = bin >> 4; + } + return s; + } + } + + Hash::Hash(Hash* n) { + digest_str = ""; + next = n; + } + + void Hash::update(std::string data) { + const char* buf = data.c_str(); + int len = data.length(); + this->_update(buf, len); + } + + void Hash::_update(const char* data, int length) { + if(digest_str.length()) { + throw std::runtime_error("Can't update after digest."); + } + this->hash_update(data, length); + if(next) { + next->_update(data, length); + } + } + + std::string Hash::digest() { + if(!digest_str.length()) { + digest_str = this->hash_digest(); + } + return digest_str; + } + + void Hash::hash_update(const char* data, int length) { + throw std::runtime_error("Not implemented."); + } + + std::string Hash::hash_digest() { + throw std::runtime_error("Not implemented."); + } +} diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 0000000..d2771a5 --- /dev/null +++ b/src/hash.h @@ -0,0 +1,27 @@ +#ifndef _HASH_H_ +#define _HASH_H_ + +#include + +namespace Multihash { + namespace Hex { + std::string hex(char* bin, int length); + std::string hex(int bin); + } + + class Hash { + private: + std::string digest_str; + Hash* next; + void _update(const char* data, int length); + protected: + virtual void hash_update(const char* data, int length); + virtual std::string hash_digest(); + public: + Hash(Hash* n = 0); + void update(std::string data); + std::string digest(); + }; +} + +#endif // _HASH_H_ diff --git a/src/md5.cpp b/src/md5.cpp new file mode 100644 index 0000000..4da0152 --- /dev/null +++ b/src/md5.cpp @@ -0,0 +1,17 @@ +#include "md5.h" + +namespace Multihash { + MD5::MD5(Hash* n) : Hash(n) { + MD5_Init(&md5_ctx); + } + + void MD5::hash_update(const char* data, int length) { + MD5_Update(&md5_ctx, data, length); + } + + std::string MD5::hash_digest() { + char digest[16]; + MD5_Final((unsigned char*)digest, &md5_ctx); + return Hex::hex(digest, 16); + } +} diff --git a/src/md5.h b/src/md5.h new file mode 100644 index 0000000..f94f5a6 --- /dev/null +++ b/src/md5.h @@ -0,0 +1,20 @@ +#ifndef _MD5_H_ +#define _MD5_H_ + +#include "hash.h" + +#include + +namespace Multihash { + class MD5 : public Hash { + private: + MD5_CTX md5_ctx; + protected: + virtual void hash_update(const char* data, int length); + virtual std::string hash_digest(); + public: + MD5(Hash* n = 0); + }; +} + +#endif // _MD5_H_ diff --git a/src/multihash.cpp b/src/multihash.cpp deleted file mode 100644 index 64dff76..0000000 --- a/src/multihash.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include "multihash.h" -#include "crc32.h" - -#include - -namespace Hex { - static char* digits = "0123456789abcdef"; - std::string hex(char* bin, int length) { - std::string s(length * 2, ' '); - for(int i = 0; i < length; i++) { - s[i*2] = digits[(bin[i] >> 4) & 0xf]; - s[i*2+1] = digits[bin[i] & 0xf]; - } - return s; - } - std::string hex(int bin) { - std::string s(sizeof(int) * 2, ' '); - for(int i = 0; i < sizeof(int) * 2; i++) { - s[sizeof(int) * 2 - 1 - i] = digits[bin & 0xf]; - bin = bin >> 4; - } - return s; - } -} - -Multihash::Multihash() { - finished = false; - - crc32_ctx = 0; - crc32_str = ""; - - ed2k_str = ""; - - MD5_Init(&md5_ctx); - md5_str = ""; - - SHA1_Init(&sha1_ctx); - sha1_str = ""; -} - -void Multihash::update(std::string data) { - if(finished) { - throw std::runtime_error("Can't update after digest."); - } - crc32_ctx = CRC32::crc32(crc32_ctx, data.c_str(), data.length()); - ed2k_ctx.update(data.c_str(), data.length()); - MD5_Update(&md5_ctx, data.c_str(), data.length()); - SHA1_Update(&sha1_ctx, data.c_str(), data.length()); -} - -std::string Multihash::crc32() { - return Hex::hex(crc32_ctx); -} - -std::string Multihash::ed2k() { - if(!ed2k_str.length()) { - finished = true; - char* digest = ed2k_ctx.digest(); - ed2k_str = Hex::hex(digest, 16); - delete digest; - } - return ed2k_str; -} - -std::string Multihash::md5() { - if(!md5_str.length()) { - finished = true; - char* digest = new char[16]; - MD5_Final((unsigned char*)digest, &md5_ctx); - md5_str = Hex::hex(digest, 16); - delete digest; - } - return md5_str; -} - -std::string Multihash::sha1() { - if(!sha1_str.length()) { - finished = true; - char* digest = new char[20]; - SHA1_Final((unsigned char*)digest, &sha1_ctx); - sha1_str = Hex::hex(digest, 20); - delete digest; - } - return sha1_str; -} diff --git a/src/multihash.h b/src/multihash.h deleted file mode 100644 index 5af339f..0000000 --- a/src/multihash.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef _MULTIHASH_H_ -#define _MULTIHASH_H_ - -#include - -#include "ed2k.h" - -#include -#include -#include - -class Multihash { - private: - bool finished; - - int crc32_ctx; - std::string crc32_str; - - Ed2k ed2k_ctx; - std::string ed2k_str; - - MD5_CTX md5_ctx; - std::string md5_str; - - SHA_CTX sha1_ctx; - std::string sha1_str; - - public: - Multihash(); - void update(std::string data); - std::string crc32(); - std::string ed2k(); - std::string md5(); - std::string sha1(); -}; - -#endif // _HASH_H_ diff --git a/src/multihash_wrapper.cpp b/src/multihash_wrapper.cpp deleted file mode 100644 index aa6f615..0000000 --- a/src/multihash_wrapper.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include "multihash.h" - -#include -using namespace boost::python; - -BOOST_PYTHON_MODULE(_multihash) -{ - class_("Multihash") - .def("update", &Multihash::update) - .def("crc32", &Multihash::crc32) - .def("ed2k", &Multihash::ed2k) - .def("md5", &Multihash::md5) - .def("sha1", &Multihash::sha1); -} diff --git a/src/sha1.cpp b/src/sha1.cpp new file mode 100644 index 0000000..1d99abd --- /dev/null +++ b/src/sha1.cpp @@ -0,0 +1,17 @@ +#include "sha1.h" + +namespace Multihash { + SHA1::SHA1(Hash* n) : Hash(n) { + SHA1_Init(&sha1_ctx); + } + + void SHA1::hash_update(const char* data, int length) { + SHA1_Update(&sha1_ctx, data, length); + } + + std::string SHA1::hash_digest() { + char digest[20]; + SHA1_Final((unsigned char*)digest, &sha1_ctx); + return Hex::hex(digest, 20); + } +} diff --git a/src/sha1.h b/src/sha1.h new file mode 100644 index 0000000..011c2f8 --- /dev/null +++ b/src/sha1.h @@ -0,0 +1,20 @@ +#ifndef _SHA1_H_ +#define _SHA1_H_ + +#include "hash.h" + +#include + +namespace Multihash { + class SHA1 : public Hash { + private: + SHA_CTX sha1_ctx; + protected: + virtual void hash_update(const char* data, int length); + virtual std::string hash_digest(); + public: + SHA1(Hash* n = 0); + }; +} + +#endif // _SHA1_H_ diff --git a/src/wrapper.cpp b/src/wrapper.cpp new file mode 100644 index 0000000..cbe86f5 --- /dev/null +++ b/src/wrapper.cpp @@ -0,0 +1,29 @@ +#include "hash.h" +#include "ed2k.h" +#include "crc32.h" +#include "md5.h" +#include "sha1.h" +using namespace Multihash; + +#include +using namespace boost::python; + +BOOST_PYTHON_MODULE(_multihash) +{ + class_("Hash") + .def(init >()) + .def("update", &Hash::update) + .def("digest", &Hash::digest); + + class_ >("Ed2k") + .def(init >()); + + class_ >("CRC32") + .def(init >()); + + class_ >("MD5") + .def(init >()); + + class_ >("SHA1") + .def(init >()); +} -- cgit v1.2.3