socketref,再见!高德

https://github.com/adoggie

  C++博客 :: 首页 :: 联系 :: 聚合  :: 管理
  246 Posts :: 4 Stories :: 312 Comments :: 0 Trackbacks

常用链接

留言簿(54)

我参与的团队

搜索

  •  

最新评论

阅读排行榜

评论排行榜

之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。
简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)
  1 # -*- coding:utf-8 -*-
  2 # http代理服务器
  3 # 1.ip限制,mac限制
  4 #
  5 # socketref@hotmail.com 
  6 # www.sw2us.com
  7 
  8 "exec" "python" "-O" "$0" "$@"
  9 
 10 __doc__ = """sw2us HTTP Proxy.
 11 
 12 """
 13 
 14 __version__ = "0.2.1"
 15 
 16 import BaseHTTPServer, select, socket, SocketServer, urlparse
 17 import httplib,traceback,re
 18 import os,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO
 19 
 20 
 21 class ConfigProperty:
 22     def __init__(self,owner):
 23         self.key=''
 24         self.value=''
 25     
 26     def create(self,text):
 27         #text -  key=value
 28         #@return: boolean
 29         pos = text.find('#')
 30         if(pos !=-1):
 31             text = text[:pos]
 32         pair = text.split('=')
 33         if len(pair) !=2:
 34             #print "Property Line Invalid:%s"%(text)
 35             return False
 36         k = pair[0].strip()
 37         v = pair[1].strip()
 38         self.key = k
 39         self.value = v
 40 
 41         return True
 42     
 43     def toString(self):
 44         s =''
 45         try:            
 46             s = "%s=%s"%(self.key,self.value)            
 47         except:
 48             return ''
 49         return s
 50     
 51     def toInt(self):
 52         r=0
 53         try:
 54             r = int(self.value)
 55         except:
 56             r =0
 57         return r
 58     
 59     def toFloat(self):
 60         r=0.0
 61         try:
 62             r = float(self.value)
 63         except:
 64             r=0.0
 65         return r
 66     
 67     
 68 #@def SimpleConfig
 69 # 简单配置信息文件,基本格式 : key=value
 70 class SimpleConfig:
 71     def __init__(self):
 72         self._file=''
 73         self._props=[]
 74         self._strip = True
 75         
 76     def open(self,file,strip=True):
 77         #打开配置文件
 78         #@param strip - 是否裁剪不可见首尾两端的字符
 79         try:
 80             self._strip = strip 
 81             self._props=[]
 82             fh = open(file,'r')
 83             lines = fh.readlines()            
 84             for text in lines:                
 85                 prop = ConfigProperty(self)
 86                 if prop.create(text) == False:                    
 87                     prop = None
 88                 else:                    
 89                     self._props.append(prop)                    
 90             fh.close()
 91         except:            
 92             return False
 93         return True
 94 
 95     def toString(self):
 96         s=''
 97         for p in self._props:
 98             s = s + p.toString() +"\n"
 99         return s
100     
101     def saveAs(self,file):
102         #保存配置信息到文件
103         try:
104             fh = open(file,'w')
105             fh.write(toString())
106             fh.close()
107         except:
108             print "write File Failed!"
109             return False
110         return True
111     
112     def getProperty(self,name):
113         #取属性值
114         prop=None
115         try:
116             for p in self._props:
117                 if p.key == name:
118                     prop = p
119                     break
120         except:
121             pass
122         
123         return prop
124     
125     def getPropertyValue(self,key,default=''):
126         prop = self.getProperty(key)
127         if not prop:
128             return default
129         return prop.value
130     
131     def getPropertyValueAsInt(self,name,default=0):
132         prop = self.getPropertyValue(name)
133         
134         if not prop:
135             return default
136         r=default
137         try:
138             r = int(prop)
139         except:pass
140         return r
141     
142     def getPropertyValueAsFloat(self,name,default=0.0):
143         prop = self.getPropertyValue(name)
144         if not prop:
145             return default
146         r = default
147         try:
148             r = float(r)
149         except:pass
150         return r
151     
152 
153 #===========================================#
154 
155     
156 #===========================================#
157 
158 def getMacList():
159     maclist=[]
160     f = os.popen('arp -a','r')
161     while True:
162         line  = f.readline()
163         if not line:
164             break
165         line = line.strip()
166         rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}).*',line)
167         #rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',line)
168         if rst:
169             #print rst.groups()
170             maclist.append(rst.groups())
171     #print maclist
172     return maclist
173 
174 
175         
176 ##########################################
177 confile = SimpleConfig()
178 confile.open('proxy.conf')
179 dbconn = None
180 
181 ##########################################
182 #初始化系统配置
183 def initConfiguration():
184     r = True
185     
186     return r
187 
188 ##########################################
189 
190 class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
191     __base = BaseHTTPServer.BaseHTTPRequestHandler
192     __base_handle = __base.handle
193     server_version = "TinyHTTPProxy/" + __version__
194     rbufsize = 0                        # self.rfile Be unbuffered
195 
196 
197 #######################################################33
198 
199     #handle()是在单独线程中执行
200     def handle(self): # 调用入口,线程刚进入,携带socket进入
201         print 'client incoming'
202         #self.__base_handle()
203         #return 
204         (ip, port) =  self.client_address
205         if hasattr(self, 'allowed_clients'and ip not in self.allowed_clients:
206             self.raw_requestline = self.rfile.readline()
207             if self.parse_request():
208                 self.send_error(403)
209         else:
210             self.__base_handle()
211 
212     def _connect_to(self, netloc, soc):
213         i = netloc.find(':')
214         if i >= 0:
215             host_port = netloc[:i], int(netloc[i+1:])
216         else:
217             host_port = netloc, 80
218         #print "\t" "connect to %s:%d" % host_port
219         try: soc.connect(host_port)
220         except socket.error, arg:
221             try: msg = arg[1]
222             except: msg = arg
223             self.send_error(404, msg)
224             return 0
225         return 1
226 
227     def do_CONNECT(self):
228         soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
229         try:
230             if self._connect_to(self.path, soc):
231                 self.log_request(200)
232                 self.wfile.write(self.protocol_version +
233                                                  " 200 Connection established\r\n")
234                 self.wfile.write("Proxy-agent: %s\r\n" % self.version_string())
235                 self.wfile.write("\r\n")
236                 self._read_write(soc, 300)
237         finally:
238             print "\t" "bye"
239             soc.close()
240             self.connection.close()
241 
242         
243     def do_GET(self):    
244         (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
245                 self.path, 'http')
246         piars = (scm, netloc, path, params, query, fragment)
247         if not netloc:
248             netloc = self.headers.get('Host'"")
249         #print ">>requester:",self.connection.getpeername(),"path:",self.path
250         #print '>>2. ',(scm, netloc, path, params, query, fragment)
251         #print 'next host:',netloc
252         if scm != 'http' or fragment or not netloc:
253             self.send_error(400"bad url %s" % self.path)
254             return
255         soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
256         try:
257             if self._connect_to(netloc, soc):
258                 self.log_request()
259                 soc.send("%s %s %s\r\n" % (
260                         self.command,
261                         urlparse.urlunparse(('''', path, params, query, '')),
262                         self.request_version))
263                 self.headers['Connection'= 'close'
264                 del self.headers['Proxy-Connection']
265                 for key_val in self.headers.items():
266                     soc.send("%s: %s\r\n" % key_val)
267                 soc.send("\r\n")
268                 #到此完成发送请求和头部信息
269                 self._read_write(soc)
270         finally:
271             print "\t" "bye"
272             soc.close()
273             self.connection.close()    
274         
275 
276     
277     def insertTags(self,tag,body,insert):
278         p1 = body.find('<%s'%tag)
279         if p1!=-1 :
280             p2 = body.find('>',p1)
281             if p2!=-1:
282                 part1 = body[:p2+1]
283                 part2 = body[p2+1:]
284                 print '*-'*20
285                 body = part1 + insert + part2
286         return body
287     
288     # google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以
289     # 插入数据之后要重新计算 content-length 并返回给客户浏览器
290     # 发现压缩的有很多 , content-encoding:gzip
291     
292     # 处理 'transfer-encoding': 'chunked'类型
293     #gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储
294     #在这里将gzip数据全部解压,还原成原始数据传出到客户端
295     def sendBackResponse(self,command,headers,body):
296         
297         insert='<h1>This is Test </h1>'
298         if headers.has_key('content-encoding'and headers['content-encoding'].strip().lower()=='gzip':
299             try:
300                 del headers['content-encoding']
301                 gzipdata=''                
302                 if headers.has_key('transfer-encoding'and headers['transfer-encoding']=='chunked':
303                     del headers['transfer-encoding']
304                     
305                     pos = 0
306                     while pos < len(body):
307                         p = body.find('\x0d\x0a',pos)
308                         sizewidth = p-pos
309                         
310                         chuncksize = int(body[pos:p],16)
311                         #print 'chunck size:',body[pos:p]
312                         p +=2 
313                         gzipdata+=body[p:p+chuncksize]
314                         pos= p+chuncksize+2
315                         if chuncksize ==0 :
316                             break
317                     #
318                     body = gzipdata
319                     
320 #
321                 
322                     #ss = zlib.decompress(gzipdata)
323                 compressedstream = StringIO.StringIO(body)
324                 gzipper = gzip.GzipFile(fileobj=compressedstream)
325                 if gzipper == None:
326                     print '*'*200
327                 body = gzipper.read()
328                 #f = open('body%s.txt'%time.time(),'wb')                    
329                 #f.write(body)
330                 #f.close()
331                     
332                 
333                     #body = gzipdata
334             except:
335                 print traceback.print_exc()
336                 print 'decompress failed!'
337                 #pos = body.find('\x0d\x0a')
338                 #pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
339                 #if pos!=-1:
340                 #    body = body[pos+9:]
341                 #    
342                 #compressedstream = StringIO.StringIO(body)
343                 #gzipper = gzip.GzipFile(fileobj=compressedstream)
344                 #if gzipper == None:
345                 #    print '*'*200
346                 #body = gzipper.read()
347                 
348                 #body = zlib.decompressobj().decompress('x\x9c'+body)
349                 
350         #m = re.search('(<body.*>)',body,re.I)
351         #if m:
352         #    pos = m.start(0)
353         #    part1 = body[:pos+len(m.group(0))]
354         #    part2 = body[pos+len(m.group(0)):]
355         #    body = part1 + insert + part2
356         #    print '-*'*20,insert,'-*'*20
357         
358         #self.insertTags('body',body,insert)
359         
360         css=""" <style>
361 #kk{
362 border:1px dotted red;
363 width:200px;
364 height:300px;
365 float:left;
366 background:#0x00ff00;
367 }
368 </style>
369 """
370         #body =self.insertTags('head',body,css)
371         
372         #body =self.insertTags('body',body,insert)
373         div="""
374         <div id="kk">
375         This is Test DIV Block!!
376 </div> 
377         """
378         
379         #read external html tags
380         try:
381             #ff = open('head.tag','r')
382             #div = ff.read()
383             #ff.close()
384             #body =self.insertTags('head',body,div)
385             body = self.publish_advertisement(body) #插入配置的广告信息
386         except:
387             pass
388         
389         #p1 = body.find('<body')
390         #if p1!=-1 :
391         #    p2 = body.find('>',p1)
392         #    if p2!=-1:
393         #        part1 = body[:p2+1]
394         #        part2 = body[p2+1:]
395         #        print '*-'*20
396         #        body = part1 + insert + part2
397             #print m.group(0)
398         headers['Content-Length'= str(len(body))
399             
400         #if headers.has_key('content-length'):
401             
402         self.connection.send(command)
403         self.connection.send('\r\n')
404         for k,v in headers.items():
405             self.connection.send("%s: %s\r\n"%(k,v))
406         self.connection.send("\r\n")
407         self.connection.sendall(body)
408         
409 
410         
411 #----------------------------------------------------
412 
413     def _read_write(self, soc, max_idling=20):
414         #getMacList()
415         iw = [self.connection, soc] # self.connnection - 内网主机连接,soc - 向外连接
416         ow = []
417         count = 0
418         #respfile = soc.makefile('rb', 1024)
419         httpCommand=''
420         httpBody=''
421         httpHeaders={}
422         isOkPageResponse=False
423         nextReadBytes=0
424         datacnt=0
425         NoContentLength = False
426         #print self.connection.getpeername()
427         while 1:
428             count += 1
429             datacnt+=1
430             (ins, _, exs) = select.select(iw, ow, iw, 3)
431             if exs:
432                 print 'error occr!'
433                 break #异常产生
434             if ins:
435                 for i in ins:
436                     if i is soc:
437                         out = self.connection
438                     else:
439                         out = soc
440                     
441                     data = i.recv(8192)
442                     if data:                        
443                         out.send(data)
444                         count = 0
445                     else:
446                         if not isOkPageResponse:
447                             return 
448             else:
449                 pass #print "\t" "idle", count
450             if count == max_idling:
451                 print 'idling exit'    
452                 break  # 指定时间内都接收不到双向数据便退出循环 20*3 = 60 secs
453         
454 
455     do_HEAD = do_GET
456     do_POST = do_GET
457     do_PUT  = do_GET
458     do_DELETE=do_GET
459 
460 class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
461                            BaseHTTPServer.HTTPServer): pass
462 
463 
464 
465 
466 def serving(HandlerClass,
467         ServerClass, protocol="HTTP/1.0"):
468     
469     if len(sys.argv) <2  or sys.argv[1]!='www.sw2us.com':
470         sys.exit()
471     
472     if sys.argv[2:]:
473         port = int(sys.argv[2])
474     else:
475         
476         port = confile.getPropertyValueAsInt('httpport',8000)
477         
478         #port = 8000
479         
480     server_address = ('', port)
481 
482     HandlerClass.protocol_version = protocol
483     httpd = ServerClass(server_address, HandlerClass)
484 
485     sa = httpd.socket.getsockname()
486     print "www.sw2us.com@2010 v.1.0.0"
487     print "Serving HTTP on", sa[0], "port", sa[1], ""
488     sys.stdout = buff
489     sys.stderr = buff
490         
491     httpd.serve_forever()
492         
493         
494         
495 if __name__ == '__main__':
496     #getMacList()
497     from sys import argv
498     
499     f = open('proxy.pid','w')
500     f.write(str(os.getpid()))
501     f.close()
502     
503     #ProxyHandler.allowed_clients = []
504     try:
505         allowed = []
506         ss = confile.getPropertyValue('allowed_clients').strip()
507         hosts = ss.split(',')
508         for h in hosts:
509             if h:
510                 client = socket.gethostbyname(h.strip())
511                 allowed.append(client)
512         if len(allowed):
513             ProxyHandler.allowed_clients = allowed    
514         buff = StringIO.StringIO()
515 
516         serving(ProxyHandler, ThreadingHTTPServer)
517     except:
518         pass
519         
520 


posted on 2010-09-30 00:16 放屁阿狗 阅读(7677) 评论(2)  编辑 收藏 引用 所属分类: perl/python/php/lua/tcl

Feedback

# re: Python开发Http代理服务器 2012-02-27 16:20 nupter
sendBackResponse是什么函数?貌似没有调用啊?  回复  更多评论
  

# re: Python开发Http代理服务器 2014-01-05 16:28 longy
弄上缩进有问题 能发份源码我么?  回复  更多评论
  


只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理