import requests import socket import sys from BeautifulSoup import BeautifulSoup reload(sys) sys.setdefaultencoding("utf-8") socket.setdefaulttimeout(10000) def update(resource): brl = "" f = "" if resource == "themes": f = open("drupal-themes.lst",'w') burl = "http://www.drupal.org/project/project_theme?page=%d" else: f = open("drupal-modules.lst",'w') burl = "http://www.drupal.org/project/project_module?page=%d" i = 0 while True: count = 0 url = burl %(i) r = requests.get(url) if r.status_code == 200: if i%1 == 0: print "Currently on page %d of %s"%(i+1, resource) soup = BeautifulSoup(r.text) for link in soup.findAll('a'): if link.parent.name == 'h2': if link.has_key('href') and not link.has_key('rel'): link = link["href"].split("/") if link[1] == "project": count = count + 1 f.write(link[2]) f.write("\n") if count == 0: break if count != 25: print "Count not 25 at %d" %(i) i = i + 1 while True: resource = raw_input("Press 1 for themes, 2 for modules, 3 for both:\n") try: if int(resource) == 1: update("themes") break elif int(resource) == 2: update("modules") break elif int(resource) == 3: update("themes") update("modules") break else: print "Invalid Input" except: print "Enter one of 1,2,3"