""" A bunch of utlility functions that let me download stuff off the net. Especially useful when I need to download a whole bunch of files off a web page. I find this way more convenient and flexible than using wget etc.""" import urllib, re def get_page(url): return urllib.urlopen(url) def get_file(url, file): urllib.urlretrieve(url, file) def read_page(url): page = get_page(url) data = page.read() return data def get_all(data, base=None, extn="mp3"): patn = re.compile(r'HREF="([^"]+?\.%s)"'%extn, re.I) tmp = patn.findall(data) if base: files = [] for file in tmp: files.append(urllib.basejoin(base, file)) return files else: return tmp def print_all(data, base=None, extn="mp3"): files = get_all(data, base, extn) for file in files: print file if __name__ == "__main__": data = read_page('http://www.wakeofthemoon.com/') print_all(data, base='http://www.wakeofthemoon.com/', extn='mp3')