#!/usr/bin/python -S """ Retrieve various daily cartoon strips. Currently works with Calvin and Hobbes, Non Sequitur, Userfriendly, Badtech, Dilbert and a Shakespearean insult. Its rather easy to add more. This code is loosely based on code from by Prahlad Vaidyanathan : http://www.symonds.net/~prahladv/files/cartoons.py who in turn 'stole it from somewhere on Useless python' and customized it: http://www.lowerstandard.com/python Prabhu Ramachandran """ import sys, string, os, re, time # Let this point to your archive directory. This is where the # cartoons will be saved. This directory will also contain a log # file (cartoon.log). ARCHIVE_DIR='/skratch/prabhu/tmp/cartoons' # If you are behind a proxy then you can either set the enviroment # variable 'http_proxy' in your shell or uncomment the following line # after editing it suitably. #os.environ['http_proxy'] = 'http://www.someproxy.com:3128' import MailMsg, mimetypes import urllib, imghdr def open_file(file_name, mode): """Creates the base directory if it does not exist and then returns the file with the requested mode.""" basedir = os.path.dirname(os.path.abspath(file_name)) if not os.path.isdir(basedir): os.mkdir(basedir) return open(file_name, mode) class GetCartoon: archive_dir = ARCHIVE_DIR #log_file = sys.stdout log_file = open_file(os.path.join(archive_dir, 'cartoon.log'), 'a') def __init__(self): if not os.path.isdir(self.archive_dir): os.mkdir(self.archive_dir) def log(self, msg): print >> self.log_file, msg self.log_file.flush() def retrieve(self, url, filename): try : self.log('Retrieving %s -> %s' % (url, filename)) urllib.urlretrieve(url,filename) self.log('Done.') except IOError, txt : self.log( 'Connect error : %s'%txt) sys.exit(1) def filename(self, name, ext): name = os.path.join(self.archive_dir, name) format = '%s-%s-%s-%s.%s' today = time.localtime(time.time()) newfile = format % (name,today[0],today[1],today[2],ext) return newfile def cleanOldCartoon(self, name, ext): name = os.path.join(self.archive_dir, name) format = '%s-%s-%s-%s.%s' today = time.localtime(time.time()) oldfile = format % (name,today[0],today[1],(today[2]-1),ext) if os.path.exists(oldfile) : os.remove(oldfile) def get(self): """Do cartoon specific stuff here and return the filename of the cartoon.""" return None class BadTech(GetCartoon): def get(self): name = self.filename('badtech','jpg') if os.path.exists(name) : self.log('Already retrieved badtech cartoon today.') else : yday = time.localtime(time.time() - 24*3600) url = 'http://www.badtech.com/a/%s/%s/%s.jpg' % \ (`yday[0]`[-1:],yday[1],yday[2]) self.retrieve(url,name) if imghdr.what(name) != 'jpeg': os.remove(name) raise AssertionError, \ "Unable to get %s. Try again later."%url # self.cleanOldCartoon('badtech', 'jpg') return name class UserFriendly(GetCartoon): def get(self): name = self.filename('userfriendly','gif') if os.path.exists(name) : self.log('Already retrieved UserFriendly cartoon today.') else: url = 'http://www.userfriendly.org' try: page = urllib.urlopen(url) except IOError, txt: self.log('Connect error: %s'%txt) sys.exit(1) patn = re.compile(r'Latest Strip[^>]+?src="(http[^"]+?\.gif)"', re.I) pic = patn.search(page.read()).groups()[0] self.retrieve(pic, name) if imghdr.what(name) != 'gif': os.remove(name) raise AssertionError, \ "Unable to get %s. Try again later."%pic # self.cleanOldCartoon('userfriendly', 'gif') return name class Dilbert(GetCartoon): def get(self): name = self.filename('dilbert', 'gif') if os.path.exists(name) : self.log('Already retrieved Dilbert cartoon today.') else: url = 'http://www.dilbert.com' try: page = urllib.urlopen(url) except IOError, txt: self.log('Connect error: %s'%txt) sys.exit(1) patn = re.compile(r'src="/([^"]+?dilbert\d+\.gif)"', re.I) comic = patn.search(page.read()).groups()[0] pic = urllib.basejoin(url, comic) self.retrieve(pic, name) if imghdr.what(name) != 'gif': os.remove(name) raise AssertionError, \ "Unable to get %s. Try again later."%pic return name class CalvinAndHobbes(GetCartoon): def get(self): name = self.filename('ch', 'gif') if os.path.exists(name) : self.log('Already retrieved Calvin and Hobbes cartoon today.') else: url = 'http://www.ucomics.com/calvinandhobbes/viewch.htm' try: page = urllib.urlopen(url) except IOError, txt: self.log('Connect error: %s'%txt) sys.exit(1) patn = re.compile(r'src="([^"]+?ch\d+\.gif)"', re.I) pic = patn.search(page.read()).groups()[0] self.retrieve(pic, name) if imghdr.what(name) != 'gif': os.remove(name) raise AssertionError, \ "Unable to get %s. Try again later."%pic return name class NonSequitur(GetCartoon): def get(self): name = self.filename('nq', 'gif') if os.path.exists(name) : self.log('Already retrieved Non Sequitur cartoon today.') else: url = 'http://www.ucomics.com/nonsequitur/viewnq.htm' try: page = urllib.urlopen(url) except IOError, txt: self.log('Connect error: %s'%txt) sys.exit(1) patn = re.compile(r'src="([^"]+?nq\d+\.gif)"', re.I) pic = patn.search(page.read()).groups()[0] self.retrieve(pic, name) if imghdr.what(name) != 'gif': os.remove(name) raise AssertionError, \ "Unable to get %s. Try again later."%pic return name class ShakespeareanInsult(GetCartoon): def get(self): name = self.filename('shakespeare','html') if os.path.exists(name): self.log('Already retrieved shakespearean insult today.') else : url = 'http://www.pangloss.com/seidel/Shaker/index.html' page = urllib.urlopen(url) patn = re.compile(r'"the Bard">\s+(

.*?)