#!/usr/bin/env python # -*- coding:Utf-8 -*- # # Crashdump.fr - Adrien Pujol - For testing only... # # thanks: Gawel, Bluetouff.. # # Version .2 / 23 Avril 2009 # work with python 2.4/2.5 and multiprocessing. # install multiprocessing with: #easy_install multiprocessing import sys, socket, httplib, urllib2, time, multiprocessing, random # Vars ### PROCESSES = 20 # Core ############################ DONT TOUCH ##################### # Compteur instances threads global counter counter = 0 # On active le debugging HTTP httplib.HTTPConnection.debuglevel = 1 # Verification du passage des arguments ( url , referer ) if len(sys.argv)<2: print 'Usage: python request.py url ' sys.exit(1) # on attribue au variables url=sys.argv[1] # Verification du format des urls if not url.startswith("http://"): print 'Not a valid url' sys.exit(1) if len(sys.argv)==3: if sys.argv[2].startswith("http://"): referer=sys.argv[2] else: print 'Not a valid referer' sys.exit(1) else: referer="" # TimeOut socket.setdefaulttimeout(6) # Introduction... print "Fetching url: "+url ########## Fonction de connection au site via les proxys ########## def fetchUrl(proxIp): result = None try: # On spécifie les Headers, pour pouvoir accepter tout type de connection. req = urllib2.Request(url, None) req.add_header('User-agent', 'Mozilla/5.5.(X11;.U;.Linux.2.4;.fr-FR;.0.8).Gecko/20010409') req.add_header('Accept','image/gif, image/jpeg, image/png, image/*, text/javascript, text/html, text/plain, text/xml, text/*, application/javascript, application/x-shockwave-flash, application/xml, application/xhtml+xml, application/*, */*') req.add_header('Cookie','X-MV-Referer=; X-Ref-Ok=1') req.add_header('Accept-Encoding','gzip,deflate,compress,identity') req.add_header('Keep-Alive','150') req.add_header('Connection','keep-alive') req.add_header('Pragma','no-cache') req.add_header('Cache-Control','no-cache') req.set_proxy(proxIp, 'http') handle = urllib2.urlopen(req) feeddata = handle.read() except (IOError, urllib2.HTTPError, urllib2.URLError), e: print multiprocessing.current_process().name, str(e), repr(e) return False, proxIp except KeyboardInterrupt, exc: pass else: print multiprocessing.current_process().name, 'fetched', len(feeddata) return True, proxIp ########## Callback ############ def cb(r): global counter print counter, r counter +=1 ########## Main ########## def main(): # Ouverture de la liste: proxies.txt try: file = open('proxies.txt','r') proxylist = file.readlines() except IOError: print 'Error reading proxy list' sys.exit(1) # Create Multiprocess pool print 'Creating pool with %d processes... ' % PROCESSES pool = multiprocessing.Pool(processes=PROCESSES) print # On attaque la liste de proxy. On les ajoutes un a un au pool. proxylist = list(set([p.strip() for p in proxylist if p.strip()])) try: while 1: proxies = random.sample(proxylist, PROCESSES) t = time.time() result = pool.map(fetchUrl, proxies) print [p for s, p in result if s], 'called in', time.time() - t, 's' for status, proxy in result: if not status and proxy in proxylist: proxylist.remove(proxy) print 'removing', proxy pool.close() except KeyboardInterrupt: print 'ctrl+c pressed ... bye.' pool.terminate() pool.join() if __name__ == '__main__': main() # # End #