好了,直接贴代码。
首先名为 ie.py 的代码:
# -*- coding:utf-8 -*-
import time
from win32com.client import DispatchEx
def ExistIE(url):
ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
ies=DispatchEx(ShellWindowsCLSID)
if len(ies)==0:
return None
for ie in ies:
if ie.LocationURL==url:
return ie
return None
def NewIE(url):
ie=DispatchEx("InternetExplorer.Application")
ie.Navigate(url)
return ie
def openIE(url):
"""
>>> myie = ie.openIE()
"""
ie=ExistIE(url)
if ie==None:
ie=NewIE(url)
return ie
def WaitIE(ie):
while ie.Busy:
time.sleep(1)
def Visible(ie):
ie.Visible=1-ie.Visible
def GetBody(ie):
WaitIE(ie)
return ie.Document.body
def GetNodes(parentNode,tag):
"""
>>> coldiv=GetNodes(body,"div")
"""
childNodes=[]
for childNode in parentNode.getElementsByTagName(tag):
childNodes.append(childNode)
return childNodes
def NodeByAttr(Nodes,nodeattr,nodeval):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
for node in Nodes:
if str(node.getAttribute(nodeattr))==nodeval:
return node
return None
def SetNode(node,val):
node.innerHTML=val
if __name__=="__main__":
url="about:blank"
myie=NewIE(url)
Visible(myie)
mybody=GetBody(myie)
SetNode(mybody,"<div>Genius</div>"*3)
coldiv=GetNodes(mybody,"div")
SetNode(coldiv[0],"<p>Hello</p>"*10)
coldiv[0].SetAttribute("id","test")
div_id_test=NodeByAttr(coldiv,"id","test")
div_p=GetNodes(div_id_test,"p")
for div_per_p in div_p:
print div_per_p.innerHTML
然后就可以调用它了,实现插入HTML的功能:
#-*- coding:utf-8 -*-
import ie
url="http://hi.baidu.com/mirguest/creat/blog/"
myie=ie.openIE(url)
#ie.Visible(myie)
mybody=ie.GetBody(myie)
# Header
input_ids=ie.GetNodes(mybody,"input")
input_id_spBlogtitle=ie.NodeByAttr(input_ids,"id","spBlogTitle")
if len(input_id_spBlogtitle.value)==0:
input_id_spBlogtitle.value="Test_Generated_By_Python"
# Catalogy
# 设置分类索引,注意,从0开始
select_ids=ie.GetNodes(mybody,"select")
select_id_spBlogName=ie.NodeByAttr(select_ids,"id","spBlogCatName")
select_id_spBlogName.selectedIndex=2
# Editor
iframe_ids=ie.GetNodes(mybody,"iframe")
iframe_id_editor=ie.NodeByAttr(iframe_ids,"id","tangram_editor_iframe_TANGRAM__1")
iframe_id_editor.click()
sonbody=iframe_id_editor.contentWindow.Document.body
# Read HTML Source
# 此处,可以设为要读取的HTML源文件。
#f=open(r"..\myhtml.html","r")
#content=f.read().decode("utf-8")
#f.close()
# Set HTML Source
content=sonbody.innerHTML+"<div class='line'></div>"
ie.SetNode(sonbody,content)
# Submit
#div_ids=ie.GetNodes(mybody,"div")
#div_id_btnbox=ie.NodeByAttr(div_ids,"id","btn-box")
#submitbtn=div_id_btnbox.childNodes(0)
#submitbtn.click()
大家可以试试,其实很简单,就是获取 InternetExplorer 后,就开始对里面的文档进行处理。
关于 HTML DOM 可参考 http://www.w3school.com.cn/htmldom/index.asp 。
另外就是 IE 的,http://msdn.microsoft.com/en-us/library/aa752084(VS.85).aspx 。
阅读全文
类别:Python 查看评论文章来源:
http://hi.baidu.com/mirguest/blog/item/b42886f997fe1f77034f5634.html