info = { 'author': 'Jon Bergli Heier', 'title': 'mahou Showtime', 'description': 'Parser for "mahou Showtime!"', } import urllib2, time from BeautifulSoup import BeautifulSoup class Module: def __init__(self, bot): self.irc = bot if self.irc: self.irc.register_keyword('!ep', self) self.cache = '' self.lastcache = 0 def find_next(self, search): search = [x.lower() for x in search.split()] try: if time.time() - self.lastcache >= 60 or not self.cache: self.cache = urllib2.urlopen('http://www.mahou.org/Showtime/').read() # missing space here seems to break BeautifulSoup's parsing self.cache = self.cache.replace('summary=""border', 'summary="" border') self.lastcache = time.time() except: return 'Failed to fetch showtime data.' soup = BeautifulSoup(self.cache) results = [] t = soup.find('table', attrs = {'summary': 'Currently Airing'}) trs = t.tr.td.table.findAll('tr') del trs[0] # delete the "header" table row for tr in trs: tds = tr.findAll('td') results.append((tds[1].contents[0], tds[3].contents[0], tds[5].contents[0], tds[6].contents[0].strip())) t = soup.find('table', attrs = {'summary': 'Starting Soon'}) trs2 = t.tr.td.table.findAll('tr') del trs2[0] # delete the "header" table row for tr in trs2: tds = tr.findAll('td') results.append((tds[1].contents[0], tds[3].contents[0], tds[5].contents[0], tds[8].contents[0].strip())) for r in results: title, channel, airtime, eta = r if all([x in title.lower() for x in search]): s = '%s airs on %s on %s (eta: %s)' % (title, channel, airtime, eta) return s.encode('utf8') return 'No match found.' def keyword(self, nick, channel, kw, msg): target = channel if not channel == self.irc.nickname else nick.split('!')[0] args = msg.split() if len(args) == 0: self.irc.msg(target, 'Usage: !ep search') return self.irc.msg(target, self.find_next(' '.join(args))) if __name__ == '__main__': import sys m = Module(None) print m.find_next(' '.join(sys.argv[1:]))