Revision 663663383230 () - Diff

Link to this snippet: https://friendpaste.com/14WvKAdlntsdIECPOBVMRd
Embed:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# -*- coding:Utf-8 -*-

#
# Crashdump.fr - Adrien Pujol - For testing only...
#
# thanks: Gawel, Bluetouff..
#

# Version .2 / 23 Avril 2009

# work with python 2.4/2.5 and multiprocessing.
# install multiprocessing with: #easy_install multiprocessing

import sys, socket, httplib, urllib2, time, multiprocessing, random

# Vars ###
PROCESSES = 20

# Core ############################ DONT TOUCH #####################

# Compteur instances threads
global counter
counter = 0

# On active le debugging HTTP
httplib.HTTPConnection.debuglevel = 1

# Verification du passage des arguments ( url , referer )
if len(sys.argv)<2:
print 'Usage: python request.py url <referer_url>'
sys.exit(1)

# on attribue au variables
url=sys.argv[1]

# Verification du format des urls
if not url.startswith("http://"):
print 'Not a valid url'
sys.exit(1)
if len(sys.argv)==3:
if sys.argv[2].startswith("http://"):
referer=sys.argv[2]
else:
print 'Not a valid referer'
sys.exit(1)
else:
referer=""
# TimeOut
socket.setdefaulttimeout(6)

# Introduction...
print "Fetching url: "+url

########## Fonction de connection au site via les proxys ##########
def fetchUrl(proxIp):
result = None
try:
# On spécifie les Headers, pour pouvoir accepter tout type de connection.
req = urllib2.Request(url, None)
req.add_header('User-agent', 'Mozilla/5.5.(X11;.U;.Linux.2.4;.fr-FR;.0.8).Gecko/20010409')
req.add_header('Accept','image/gif, image/jpeg, image/png, image/*, text/javascript, text/html, text/plain, text/xml, text/*, application/javascript, application/x-shockwave-flash, application/xml, application/xhtml+xml, application/*, */*')
req.add_header('Cookie','X-MV-Referer=; X-Ref-Ok=1')
req.add_header('Accept-Encoding','gzip,deflate,compress,identity')
req.add_header('Keep-Alive','150')
req.add_header('Connection','keep-alive')
req.add_header('Pragma','no-cache')
req.add_header('Cache-Control','no-cache')
req.set_proxy(proxIp, 'http')
handle = urllib2.urlopen(req)
feeddata = handle.read()
except (IOError, urllib2.HTTPError, urllib2.URLError), e:
print multiprocessing.current_process().name, str(e), repr(e)
return False, proxIp
except KeyboardInterrupt, exc:
pass
else:
print multiprocessing.current_process().name, 'fetched', len(feeddata)
return True, proxIp

########## Callback ############
def cb(r):
global counter
print counter, r
counter +=1

########## Main ##########
def main():
# Ouverture de la liste: proxies.txt
try:
file = open('proxies.txt','r')
proxylist = file.readlines()
except IOError:
print 'Error reading proxy list'
sys.exit(1)

# Create Multiprocess pool
print 'Creating pool with %d processes... ' % PROCESSES
pool = multiprocessing.Pool(processes=PROCESSES)
print

# On attaque la liste de proxy. On les ajoutes un a un au pool.
proxylist = list(set([p.strip() for p in proxylist if p.strip()]))
try:
while 1:
proxies = random.sample(proxylist, PROCESSES)
t = time.time()
result = pool.map(fetchUrl, proxies)
print [p for s, p in result if s], 'called in', time.time() - t, 's'
for status, proxy in result:
if not status and proxy in proxylist:
proxylist.remove(proxy)
print 'removing', proxy
pool.close()
except KeyboardInterrupt:
print 'ctrl+c pressed ... bye.'
pool.terminate()
pool.join()

if __name__ == '__main__':
main()


#
# End
#