之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。 简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)
1 # -*- coding:utf-8 -*- 2 # http代理服务器 3 # 1.ip限制,mac限制 4 # 5 # socketref@hotmail.com 6 # www.sw2us.com 7 8 "exec" "python" "-O" "$0" "$@" 9 10 __doc__ = """sw2us HTTP Proxy. 11 12 """ 13 14 __version__ = "0.2.1" 15 16 import BaseHTTPServer, select, socket, SocketServer, urlparse 17 import httplib,traceback,re 18 import os,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO 19 20 21 class ConfigProperty: 22 def __init__(self,owner): 23 self.key='' 24 self.value='' 25 26 def create(self,text): 27 #text - key=value 28 #@return: boolean 29 pos = text.find('#') 30 if(pos !=-1): 31 text = text[:pos] 32 pair = text.split('=') 33 if len(pair) !=2: 34 #print "Property Line Invalid:%s"%(text) 35 return False 36 k = pair[0].strip() 37 v = pair[1].strip() 38 self.key = k 39 self.value = v 40 41 return True 42 43 def toString(self): 44 s ='' 45 try: 46 s = "%s=%s"%(self.key,self.value) 47 except: 48 return '' 49 return s 50 51 def toInt(self): 52 r=0 53 try: 54 r = int(self.value) 55 except: 56 r =0 57 return r 58 59 def toFloat(self): 60 r=0.0 61 try: 62 r = float(self.value) 63 except: 64 r=0.0 65 return r 66 67 68 #@def SimpleConfig 69 # 简单配置信息文件,基本格式 : key=value 70 class SimpleConfig: 71 def __init__(self): 72 self._file='' 73 self._props=[] 74 self._strip = True 75 76 def open(self,file,strip=True): 77 #打开配置文件 78 #@param strip - 是否裁剪不可见首尾两端的字符 79 try: 80 self._strip = strip 81 self._props=[] 82 fh = open(file,'r') 83 lines = fh.readlines() 84 for text in lines: 85 prop = ConfigProperty(self) 86 if prop.create(text) == False: 87 prop = None 88 else: 89 self._props.append(prop) 90 fh.close() 91 except: 92 return False 93 return True 94 95 def toString(self): 96 s='' 97 for p in self._props: 98 s = s + p.toString() +"\n" 99 return s 100 101 def saveAs(self,file): 102 #保存配置信息到文件 103 try: 104 fh = open(file,'w') 105 fh.write(toString()) 106 fh.close() 107 except: 108 print "write File Failed!" 109 return False 110 return True 111 112 def getProperty(self,name): 113 #取属性值 114 prop=None 115 try: 116 for p in self._props: 117 if p.key == name: 118 prop = p 119 break 120 except: 121 pass 122 123 return prop 124 125 def getPropertyValue(self,key,default=''): 126 prop = self.getProperty(key) 127 if not prop: 128 return default 129 return prop.value 130 131 def getPropertyValueAsInt(self,name,default=0): 132 prop = self.getPropertyValue(name) 133 134 if not prop: 135 return default 136 r=default 137 try: 138 r = int(prop) 139 except:pass 140 return r 141 142 def getPropertyValueAsFloat(self,name,default=0.0): 143 prop = self.getPropertyValue(name) 144 if not prop: 145 return default 146 r = default 147 try: 148 r = float(r) 149 except:pass 150 return r 151 152 153 #===========================================# 154 155 156 #===========================================# 157 158 def getMacList(): 159 maclist=[] 160 f = os.popen('arp -a','r') 161 while True: 162 line = f.readline() 163 if not line: 164 break 165 line = line.strip() 166 rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}).*',line) 167 #rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',line) 168 if rst: 169 #print rst.groups() 170 maclist.append(rst.groups()) 171 #print maclist 172 return maclist 173 174 175 176 ########################################## 177 confile = SimpleConfig() 178 confile.open('proxy.conf') 179 dbconn = None 180 181 ########################################## 182 #初始化系统配置 183 def initConfiguration(): 184 r = True 185 186 return r 187 188 ########################################## 189 190 class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler): 191 __base = BaseHTTPServer.BaseHTTPRequestHandler 192 __base_handle = __base.handle 193 server_version = "TinyHTTPProxy/" + __version__ 194 rbufsize = 0 # self.rfile Be unbuffered 195 196 197 #######################################################33 198 199 #handle()是在单独线程中执行 200 def handle(self): # 调用入口,线程刚进入,携带socket进入 201 print 'client incoming' 202 #self.__base_handle() 203 #return 204 (ip, port) = self.client_address 205 if hasattr(self, 'allowed_clients') and ip not in self.allowed_clients: 206 self.raw_requestline = self.rfile.readline() 207 if self.parse_request(): 208 self.send_error(403) 209 else: 210 self.__base_handle() 211 212 def _connect_to(self, netloc, soc): 213 i = netloc.find(':') 214 if i >= 0: 215 host_port = netloc[:i], int(netloc[i+1:]) 216 else: 217 host_port = netloc, 80 218 #print "\t" "connect to %s:%d" % host_port 219 try: soc.connect(host_port) 220 except socket.error, arg: 221 try: msg = arg[1] 222 except: msg = arg 223 self.send_error(404, msg) 224 return 0 225 return 1 226 227 def do_CONNECT(self): 228 soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 229 try: 230 if self._connect_to(self.path, soc): 231 self.log_request(200) 232 self.wfile.write(self.protocol_version + 233 " 200 Connection established\r\n") 234 self.wfile.write("Proxy-agent: %s\r\n" % self.version_string()) 235 self.wfile.write("\r\n") 236 self._read_write(soc, 300) 237 finally: 238 print "\t" "bye" 239 soc.close() 240 self.connection.close() 241 242 243 def do_GET(self): 244 (scm, netloc, path, params, query, fragment) = urlparse.urlparse( 245 self.path, 'http') 246 piars = (scm, netloc, path, params, query, fragment) 247 if not netloc: 248 netloc = self.headers.get('Host', "") 249 #print ">>requester:",self.connection.getpeername(),"path:",self.path 250 #print '>>2. ',(scm, netloc, path, params, query, fragment) 251 #print 'next host:',netloc 252 if scm != 'http' or fragment or not netloc: 253 self.send_error(400, "bad url %s" % self.path) 254 return 255 soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 256 try: 257 if self._connect_to(netloc, soc): 258 self.log_request() 259 soc.send("%s %s %s\r\n" % ( 260 self.command, 261 urlparse.urlunparse(('', '', path, params, query, '')), 262 self.request_version)) 263 self.headers['Connection'] = 'close' 264 del self.headers['Proxy-Connection'] 265 for key_val in self.headers.items(): 266 soc.send("%s: %s\r\n" % key_val) 267 soc.send("\r\n") 268 #到此完成发送请求和头部信息 269 self._read_write(soc) 270 finally: 271 print "\t" "bye" 272 soc.close() 273 self.connection.close() 274 275 276 277 def insertTags(self,tag,body,insert): 278 p1 = body.find('<%s'%tag) 279 if p1!=-1 : 280 p2 = body.find('>',p1) 281 if p2!=-1: 282 part1 = body[:p2+1] 283 part2 = body[p2+1:] 284 print '*-'*20 285 body = part1 + insert + part2 286 return body 287 288 # google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以 289 # 插入数据之后要重新计算 content-length 并返回给客户浏览器 290 # 发现压缩的有很多 , content-encoding:gzip 291 292 # 处理 'transfer-encoding': 'chunked'类型 293 #gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储 294 #在这里将gzip数据全部解压,还原成原始数据传出到客户端 295 def sendBackResponse(self,command,headers,body): 296 297 insert='<h1>This is Test </h1>' 298 if headers.has_key('content-encoding') and headers['content-encoding'].strip().lower()=='gzip': 299 try: 300 del headers['content-encoding'] 301 gzipdata='' 302 if headers.has_key('transfer-encoding') and headers['transfer-encoding']=='chunked': 303 del headers['transfer-encoding'] 304 305 pos = 0 306 while pos < len(body): 307 p = body.find('\x0d\x0a',pos) 308 sizewidth = p-pos 309 310 chuncksize = int(body[pos:p],16) 311 #print 'chunck size:',body[pos:p] 312 p +=2 313 gzipdata+=body[p:p+chuncksize] 314 pos= p+chuncksize+2 315 if chuncksize ==0 : 316 break 317 # 318 body = gzipdata 319 320 # 321 322 #ss = zlib.decompress(gzipdata) 323 compressedstream = StringIO.StringIO(body) 324 gzipper = gzip.GzipFile(fileobj=compressedstream) 325 if gzipper == None: 326 print '*'*200 327 body = gzipper.read() 328 #f = open('body%s.txt'%time.time(),'wb') 329 #f.write(body) 330 #f.close() 331 332 333 #body = gzipdata 334 except: 335 print traceback.print_exc() 336 print 'decompress failed!' 337 #pos = body.find('\x0d\x0a') 338 #pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff') 339 #if pos!=-1: 340 # body = body[pos+9:] 341 # 342 #compressedstream = StringIO.StringIO(body) 343 #gzipper = gzip.GzipFile(fileobj=compressedstream) 344 #if gzipper == None: 345 # print '*'*200 346 #body = gzipper.read() 347 348 #body = zlib.decompressobj().decompress('x\x9c'+body) 349 350 #m = re.search('(<body.*>)',body,re.I) 351 #if m: 352 # pos = m.start(0) 353 # part1 = body[:pos+len(m.group(0))] 354 # part2 = body[pos+len(m.group(0)):] 355 # body = part1 + insert + part2 356 # print '-*'*20,insert,'-*'*20 357 358 #self.insertTags('body',body,insert) 359 360 css=""" <style> 361 #kk{ 362 border:1px dotted red; 363 width:200px; 364 height:300px; 365 float:left; 366 background:#0x00ff00; 367 } 368 </style> 369 """ 370 #body =self.insertTags('head',body,css) 371 372 #body =self.insertTags('body',body,insert) 373 div=""" 374 <div id="kk"> 375 This is Test DIV Block!! 376 </div> 377 """ 378 379 #read external html tags 380 try: 381 #ff = open('head.tag','r') 382 #div = ff.read() 383 #ff.close() 384 #body =self.insertTags('head',body,div) 385 body = self.publish_advertisement(body) #插入配置的广告信息 386 except: 387 pass 388 389 #p1 = body.find('<body') 390 #if p1!=-1 : 391 # p2 = body.find('>',p1) 392 # if p2!=-1: 393 # part1 = body[:p2+1] 394 # part2 = body[p2+1:] 395 # print '*-'*20 396 # body = part1 + insert + part2 397 #print m.group(0) 398 headers['Content-Length'] = str(len(body)) 399 400 #if headers.has_key('content-length'): 401 402 self.connection.send(command) 403 self.connection.send('\r\n') 404 for k,v in headers.items(): 405 self.connection.send("%s: %s\r\n"%(k,v)) 406 self.connection.send("\r\n") 407 self.connection.sendall(body) 408 409 410 411 #---------------------------------------------------- 412 413 def _read_write(self, soc, max_idling=20): 414 #getMacList() 415 iw = [self.connection, soc] # self.connnection - 内网主机连接,soc - 向外连接 416 ow = [] 417 count = 0 418 #respfile = soc.makefile('rb', 1024) 419 httpCommand='' 420 httpBody='' 421 httpHeaders={} 422 isOkPageResponse=False 423 nextReadBytes=0 424 datacnt=0 425 NoContentLength = False 426 #print self.connection.getpeername() 427 while 1: 428 count += 1 429 datacnt+=1 430 (ins, _, exs) = select.select(iw, ow, iw, 3) 431 if exs: 432 print 'error occr!' 433 break #异常产生 434 if ins: 435 for i in ins: 436 if i is soc: 437 out = self.connection 438 else: 439 out = soc 440 441 data = i.recv(8192) 442 if data: 443 out.send(data) 444 count = 0 445 else: 446 if not isOkPageResponse: 447 return 448 else: 449 pass #print "\t" "idle", count 450 if count == max_idling: 451 print 'idling exit' 452 break # 指定时间内都接收不到双向数据便退出循环 20*3 = 60 secs 453 454 455 do_HEAD = do_GET 456 do_POST = do_GET 457 do_PUT = do_GET 458 do_DELETE=do_GET 459 460 class ThreadingHTTPServer (SocketServer.ThreadingMixIn, 461 BaseHTTPServer.HTTPServer): pass 462 463 464 465 466 def serving(HandlerClass, 467 ServerClass, protocol="HTTP/1.0"): 468 469 if len(sys.argv) <2 or sys.argv[1]!='www.sw2us.com': 470 sys.exit() 471 472 if sys.argv[2:]: 473 port = int(sys.argv[2]) 474 else: 475 476 port = confile.getPropertyValueAsInt('httpport',8000) 477 478 #port = 8000 479 480 server_address = ('', port) 481 482 HandlerClass.protocol_version = protocol 483 httpd = ServerClass(server_address, HandlerClass) 484 485 sa = httpd.socket.getsockname() 486 print "www.sw2us.com@2010 v.1.0.0" 487 print "Serving HTTP on", sa[0], "port", sa[1], "" 488 sys.stdout = buff 489 sys.stderr = buff 490 491 httpd.serve_forever() 492 493 494 495 if __name__ == '__main__': 496 #getMacList() 497 from sys import argv 498 499 f = open('proxy.pid','w') 500 f.write(str(os.getpid())) 501 f.close() 502 503 #ProxyHandler.allowed_clients = [] 504 try: 505 allowed = [] 506 ss = confile.getPropertyValue('allowed_clients').strip() 507 hosts = ss.split(',') 508 for h in hosts: 509 if h: 510 client = socket.gethostbyname(h.strip()) 511 allowed.append(client) 512 if len(allowed): 513 ProxyHandler.allowed_clients = allowed 514 buff = StringIO.StringIO() 515 516 serving(ProxyHandler, ThreadingHTTPServer) 517 except: 518 pass 519 520
|