diff options
author | Jon Bergli Heier <snakebite@jvnv.net> | 2010-06-09 01:10:24 +0200 |
---|---|---|
committer | Jon Bergli Heier <snakebite@jvnv.net> | 2010-06-09 01:10:24 +0200 |
commit | 1b890679774d64ff83b12fe5e3bf44401d3b2467 (patch) | |
tree | df4eab6100cb57e3796a2b5dfa15e0bbf1c0a86e | |
parent | 7cec7c641da41c3c456fcaab07046b84d49566ef (diff) |
modules: Added a TVRage.com parser along with a rfc3339 parser and formatter.
-rw-r--r-- | modules/tvrage.py | 79 | ||||
-rw-r--r-- | rfc3339.py | 296 |
2 files changed, 375 insertions, 0 deletions
diff --git a/modules/tvrage.py b/modules/tvrage.py new file mode 100644 index 0000000..cd8ee8f --- /dev/null +++ b/modules/tvrage.py @@ -0,0 +1,79 @@ +info = { + 'author': 'Jon Bergli Heier', + 'title': 'TVRage', + 'description': 'TVRage.com feed parser', +} + +import urllib, urllib2, datetime, rfc3339, pytz +from lxml import etree + +class Module: + def __init__(self, bot): + self.irc = bot + if self.irc: + self.irc.register_keyword('!tv', self) + + def find_show(self, search): + try: + u = urllib2.urlopen('http://services.tvrage.com/tools/quickinfo.php?%s' % urllib.urlencode({'show': search})) + except: + return 'Could not fetch show data from TVRage.' + + rawdata = u.read() + if rawdata.startswith('No Show Results'): + return rawdata.strip() + + # Why is there a pre-tag here in the first place? + if rawdata.startswith('<pre>'): + rawdata = rawdata[5:] + + data = {} + for line in rawdata.strip().split('\n'): + key, values = line.split('@') + values = values.split('^') + data[key] = values + + status = data['Status'][0] + if 'Ended' in status or 'Canceled' in status: + return '\002%s\002 does not currently air.' % data['Show Name'][0] + + # TODO: Fetch this from somewhere user-configurable + local_tz = pytz.timezone('Europe/Oslo') + + airdate = data['RFC3339'][0] + if len(airdate) == 24: # Assume missing 0 in timezone + airdate = airdate[:20] + '0' + airdate[20:] + airdate = rfc3339.parse_datetime(airdate) + + # Convert airdate to our local timezone + airdate = airdate.astimezone(local_tz) + + # Localize utcnow() as UTC + now = pytz.utc.localize(datetime.datetime.utcnow()) + + eta = airdate - now + # Get rid of microseconds + eta = datetime.timedelta(eta.days, eta.seconds) + + + aired = eta.days < 0 or eta.seconds < 0 + + if aired: + return '\002%s\002 aired on \002%s\002' % (data['Show Name'][0], airdate.strftime('%d.%m.%Y %H:%M %Z')) + else: + return '\002%s\002 %s airs on \002%s\002 (eta: %s)' % (data['Show Name'][0], + data['Next Episode'][1], + airdate.strftime('%d.%m.%Y %H:%M %Z'), + eta) + + def keyword(self, nick, channel, kw, msg): + target = channel if not channel == self.irc.nickname else nick.split('!')[0] + args = msg.split() + if len(args) == 0: + self.irc.msg(target, 'Usage: !tv search') + return + +if __name__ == '__main__': + import sys + m = Module(None) + print m.find_show(' '.join(sys.argv[1:])) diff --git a/rfc3339.py b/rfc3339.py new file mode 100644 index 0000000..95d7249 --- /dev/null +++ b/rfc3339.py @@ -0,0 +1,296 @@ +# rfc3339.py -- Implementation of the majority of RFC 3339 for python. +# Copyright (c) 2008, 2009, 2010 LShift Ltd. <query@lshift.net> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" +Implementation of the majority of http://www.ietf.org/rfc/rfc3339.txt. + +Use datetime.datetime.isoformat() as an inverse of the various parsing +routines in this module. + +Limitations, with respect to RFC 3339: + + - Section 4.3, "Unknown Local Offset Convention", is not implemented. + + - Section 5.6, "Internet Date/Time Format", is the ONLY supported format + implemented by the various parsers in this module. (Section 5.6 is + reproduced in its entirety below.) + + - Section 5.7, "Restrictions", is left to the datetime.datetime constructor + to implement, with the exception of limits on timezone + minutes-east-of-UTC magnitude. In particular, leap seconds are not + addressed by this module. (And it appears that they are not supported + by datetime, either.) + +Potential Improvements: + + - Support for leap seconds. (There's a table of them in RFC 3339 itself, + and http://tf.nist.gov/pubs/bulletin/leapsecond.htm updates monthly.) + +Here's an excerpt from RFC 3339 itself: + +5.6. Internet Date/Time Format + + The following profile of ISO 8601 [ISO8601] dates SHOULD be used in + new protocols on the Internet. This is specified using the syntax + description notation defined in [ABNF]. + + date-fullyear = 4DIGIT + date-month = 2DIGIT ; 01-12 + date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on + ; month/year + time-hour = 2DIGIT ; 00-23 + time-minute = 2DIGIT ; 00-59 + time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second + ; rules + time-secfrac = "." 1*DIGIT + time-numoffset = ("+" / "-") time-hour ":" time-minute + time-offset = "Z" / time-numoffset + + partial-time = time-hour ":" time-minute ":" time-second + [time-secfrac] + full-date = date-fullyear "-" date-month "-" date-mday + full-time = partial-time time-offset + + date-time = full-date "T" full-time + + NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this + syntax may alternatively be lower case "t" or "z" respectively. + + This date/time format may be used in some environments or contexts + that distinguish between the upper- and lower-case letters 'A'-'Z' + and 'a'-'z' (e.g. XML). Specifications that use this format in + such environments MAY further limit the date/time syntax so that + the letters 'T' and 'Z' used in the date/time syntax must always + be upper case. Applications that generate this format SHOULD use + upper case letters. + + NOTE: ISO 8601 defines date and time separated by "T". + Applications using this syntax may choose, for the sake of + readability, to specify a full-date and full-time separated by + (say) a space character. +""" + +import datetime, time, calendar +import re + +__all__ = ["tzinfo", "UTC_TZ", "parse_date", "parse_datetime", "now", "utcfromtimestamp", "utctotimestamp", "datetimetostr", "timestamptostr", "strtotimestamp"] + +ZERO = datetime.timedelta(0) + +class tzinfo(datetime.tzinfo): + """ + Implementation of a fixed-offset tzinfo. + """ + def __init__(self, minutesEast, name): + """ + minutesEast -> number of minutes east of UTC that this tzinfo represents. + name -> symbolic (but uninterpreted) name of this tzinfo. + """ + self.minutesEast = minutesEast + self.offset = datetime.timedelta(minutes = minutesEast) + self.name = name + + def utcoffset(self, dt): + """Returns minutesEast from the constructor, as a datetime.timedelta.""" + return self.offset + + def dst(self, dt): + """This is a fixed offset tzinfo, so always returns a zero timedelta.""" + return ZERO + + def tzname(self, dt): + """Returns the name from the constructor.""" + return self.name + + def __repr__(self): + """If minutesEast==0, prints specially as rfc3339.UTC_TZ.""" + if self.minutesEast == 0: + return "rfc3339.UTC_TZ" + else: + return "rfc3339.tzinfo(%s,%s)" % (self.minutesEast, repr(self.name)) + +UTC_TZ = tzinfo(0, 'Z') + +date_re_str = r'(\d\d\d\d)-(\d\d)-(\d\d)' +time_re_str = r'(\d\d):(\d\d):(\d\d)(\.(\d+))?([zZ]|(([-+])(\d\d):(\d\d)))' + +def make_re(*parts): + return re.compile(r'^\s*' + ''.join(parts) + r'\s*$') + +date_re = make_re(date_re_str) +datetime_re = make_re(date_re_str, r'[ tT]', time_re_str) + +def parse_date(s): + """ + Given a string matching the 'full-date' production above, returns + a datetime.date instance. Any deviation from the allowed format + will produce a raised ValueError. + + >>> parse_date("2008-08-24") + datetime.date(2008, 8, 24) + >>> parse_date(" 2008-08-24 ") + datetime.date(2008, 8, 24) + >>> parse_date("2008-08-00") + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "rfc3339.py", line 134, in parse_date + return datetime.date(int(y), int(m), int(d)) + ValueError: day is out of range for month + >>> parse_date("2008-06-31") + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "rfc3339.py", line 134, in parse_date + return datetime.date(int(y), int(m), int(d)) + ValueError: day is out of range for month + >>> parse_date("2008-13-01") + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "rfc3339.py", line 134, in parse_date + return datetime.date(int(y), int(m), int(d)) + ValueError: month must be in 1..12 + >>> parse_date("22008-01-01") + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "rfc3339.py", line 136, in parse_date + raise ValueError('Invalid RFC 3339 date string', s) + ValueError: ('Invalid RFC 3339 date string', '22008-01-01') + >>> parse_date("2008-08-24").isoformat() + '2008-08-24' + """ + m = date_re.match(s) + if m: + (y, m, d) = m.groups() + return datetime.date(int(y), int(m), int(d)) + else: + raise ValueError('Invalid RFC 3339 date string', s) + +def parse_datetime(s): + """ + Given a string matching the 'date-time' production above, returns + a datetime.datetime instance. Any deviation from the allowed + format will produce a raised ValueError. + + >>> parse_datetime("2008-08-24T00:00:00Z") + datetime.datetime(2008, 8, 24, 0, 0, tzinfo=rfc3339.UTC_TZ) + >>> parse_datetime(" 2008-08-24T00:00:00Z ") + datetime.datetime(2008, 8, 24, 0, 0, tzinfo=rfc3339.UTC_TZ) + >>> parse_datetime("2008-08-24T00:00:00") + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "rfc3339.py", line 208, in parse_datetime + raise ValueError('Invalid RFC 3339 datetime string', s) + ValueError: ('Invalid RFC 3339 datetime string', '2008-08-24T00:00:00') + >>> parse_datetime("2008-08-24T00:00:00+00:00") + datetime.datetime(2008, 8, 24, 0, 0, tzinfo=rfc3339.UTC_TZ) + >>> parse_datetime("2008-08-24T00:00:00+01:00") + datetime.datetime(2008, 8, 24, 0, 0, tzinfo=rfc3339.tzinfo(60,'+01:00')) + >>> parse_datetime("2008-08-24T00:00:00-01:00") + datetime.datetime(2008, 8, 24, 0, 0, tzinfo=rfc3339.tzinfo(-60,'-01:00')) + >>> parse_datetime("2008-08-24T24:00:00Z") + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "rfc3339.py", line 206, in parse_datetime + tz) + ValueError: hour must be in 0..23 + >>> midnightUTC = parse_datetime("2008-08-24T00:00:00Z") + >>> oneamBST = parse_datetime("2008-08-24T01:00:00+01:00") + >>> midnightUTC == oneamBST + True + >>> elevenpmUTC = parse_datetime("2008-08-23T23:00:00Z") + >>> midnightBST = parse_datetime("2008-08-24T00:00:00+01:00") + >>> midnightBST == elevenpmUTC + True + >>> elevenpmUTC.isoformat() + '2008-08-23T23:00:00+00:00' + >>> oneamBST.isoformat() + '2008-08-24T01:00:00+01:00' + >>> parse_datetime("2008-08-24T00:00:00.123Z").isoformat() + '2008-08-24T00:00:00.123000+00:00' + """ + m = datetime_re.match(s) + if m: + (y, m, d, hour, min, sec, ignore1, frac_sec, wholetz, ignore2, tzsign, tzhour, tzmin) = \ + m.groups() + + if frac_sec: + frac_sec = float("0." + frac_sec) + else: + frac_sec = 0 + microsec = int((frac_sec * 1000000) + 0.5) + + if wholetz == 'z' or wholetz == 'Z': + tz = UTC_TZ + else: + tzhour = int(tzhour) + tzmin = int(tzmin) + offset = tzhour * 60 + tzmin + if offset == 0: + tz = UTC_TZ + else: + if tzhour > 24 or tzmin > 60 or offset > 1439: ## see tzinfo docs for the 1439 part + raise ValueError('Invalid timezone offset', s, wholetz) + + if tzsign == '-': + offset = -offset + tz = tzinfo(offset, wholetz) + + return datetime.datetime(int(y), int(m), int(d), + int(hour), int(min), int(sec), microsec, + tz) + else: + raise ValueError('Invalid RFC 3339 datetime string', s) + +def now(): + """Return a timezone-aware datetime.datetime object in + rfc3339.UTC_TZ timezone, representing the current moment + (time.time()). Useful as a replacement for the (timezone-unaware) + datetime.datetime.now() method.""" + return utcfromtimestamp(time.time()) + +def utcfromtimestamp(unix_epoch_timestamp): + """Interprets its argument as a count of seconds elapsed since the + Unix epoch, and returns a datetime.datetime in rfc3339.UTC_TZ + timezone.""" + (y, m, d, hour, min, sec) = time.gmtime(unix_epoch_timestamp)[:6] + return datetime.datetime(y, m, d, hour, min, sec, 0, UTC_TZ) + +def utctotimestamp(dt): + """Returns a count of the elapsed seconds between the Unix epoch + and the passed-in datetime.datetime object.""" + return calendar.timegm(dt.utctimetuple()) + +def datetimetostr(dt): + """Return a RFC3339 date-time string corresponding to the given + datetime object.""" + if dt.utcoffset() is not None: + return dt.isoformat() + else: + return "%sZ" % dt.isoformat() + +def timestamptostr(ts): + """Return a RFC3339 date-time string corresponding to the given + Unix-epoch timestamp.""" + return datetimetostr(utcfromtimestamp(ts)) + +def strtotimestamp(s): + """Return the Unix-epoch timestamp corresponding to the given RFC3339 + date-time string.""" + return utctotimestamp(parse_datetime(s)) |