info = { 'author': 'Jon Bergli Heier', 'title': 'URL Titles', 'description': 'Fetches the title tags off of URLs.', } cfg_section = 'module/url_titles' import re, urllib2, htmlentitydefs, gzip, cStringIO, time from PIL import ImageFile class Module: re_http = re.compile(r'(https?://[^\ #]+)') re_title = re.compile(r']*?>(.*?)', re.S | re.I) re_meta = re.compile(r'' in s): break buf = u.read(1024) ct = u.headers['content-type'] enc = ct.lower().split('charset=') if len(enc) == 2: enc = enc[1] elif meta_enc: enc = meta_enc.groups()[0] else: enc = None if m: s = m.groups()[0] if enc: s = s.decode(enc, 'replace').encode('utf8', 'replace') titles.append(s) elif u.headers['content-type'] in ('image/gif', 'image/png', 'image/jpeg'): def pretty_size(size): suffixes = (('B', 2**10), ('KiB', 2**20), ('MiB', 2**30), ('GiB', 2**40), ('TiB', 2**50)) for suf, lim in suffixes: if size > lim: continue else: return '%s %s' % (str(round(size/float(lim/2**10), 2)), suf) p = ImageFile.Parser() size = 0 while time.time() - t < 5.0: s = u.read(1024) size += len(s) if not s: break p.feed(s) try: im = None im = p.close() titles.append('%s image: %dx%d (%s)' % ((im.format,) + tuple(im.size) + (pretty_size(size),))) except: pass finally: del im u.close() if len(titles) == 0: return elif len(titles) == 1: s = format_text(titles[0]) else: s = '' for i in range(len(titles)): s += '\002[%d]\002 %s ' % (i+1, format_text(titles[i])) return s.strip() def privmsg(self, nick, channel, msg): titles = self.get_titles(msg) if titles: self.irc.msg(channel if not channel == self.irc.nickname else nick.split('!')[0], titles) if __name__ == '__main__': import sys, ConfigParser, os config = ConfigParser.ConfigParser() config.read([os.path.expanduser('~/.fot')]) m = Module(None) print m.get_titles(' '.join(sys.argv[1:]))