diff options
author | Jon Bergli Heier <snakebite@jvnv.net> | 2015-09-27 11:04:27 +0200 |
---|---|---|
committer | Jon Bergli Heier <snakebite@jvnv.net> | 2015-09-27 11:04:27 +0200 |
commit | f30f76acd13b45143adde1a78f185f7d81058628 (patch) | |
tree | d3713f1b3cfb9e0eac4cd18f40c49b1d400a7d91 /modules | |
parent | 09e5bd6053df3c848b1ea853d23aebb145398dac (diff) |
url_titles: Prefer Content-Type from HTTP headers.
Diffstat (limited to 'modules')
-rw-r--r-- | modules/url_titles.py | 23 |
1 files changed, 11 insertions, 12 deletions
diff --git a/modules/url_titles.py b/modules/url_titles.py index e4bbae6..fc750f8 100644 --- a/modules/url_titles.py +++ b/modules/url_titles.py @@ -83,28 +83,27 @@ class Module: s.seek(0) s = gzip.GzipFile(fileobj = s).read() m = self.re_title.search(s) - enc = self.re_meta.search(s) + meta_enc = self.re_meta.search(s) else: s = '' m = None - enc = None + meta_enc = None buf = u.read(1024) while buf and time.time() - t < 5.0: s += buf m = self.re_title.search(s) - enc = self.re_meta.search(s) - if m and (enc or '</head>' in s): + meta_enc = self.re_meta.search(s) + if m and (meta_enc or '</head>' in s): break buf = u.read(1024) - if enc: - enc = enc.groups()[0] + ct = u.headers['content-type'] + enc = ct.lower().split('charset=') + if len(enc) == 2: + enc = enc[1] + elif meta_enc: + enc = meta_enc.groups()[0] else: - ct = u.headers['content-type'] - enc = ct.lower().split('charset=') - if len(enc) == 2: - enc = enc[1] - else: - enc = None + enc = None if m: s = m.groups()[0] |