随笔 - 41, 文章 - 8, 评论 - 8, 引用 - 0
数据加载中……

[导入][python]在百度空间中插入HTML——python实现

好了,直接贴代码。
首先名为 ie.py 的代码:
 

# -*- coding:utf-8 -*-
import time
from win32com.client import DispatchEx

def ExistIE(url):
    ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
    ies=DispatchEx(ShellWindowsCLSID)
    if len(ies)==0:
        return None
    for ie in ies:
        if ie.LocationURL==url:
            return ie
    return None

def NewIE(url):
    ie=DispatchEx("InternetExplorer.Application")
    ie.Navigate(url)
    return ie

def openIE(url):
    """
    >>> myie = ie.openIE()
    """
    ie=ExistIE(url)
    if ie==None:
        ie=NewIE(url)
    return ie

def WaitIE(ie):
    while ie.Busy:
        time.sleep(1)

def Visible(ie):
    ie.Visible=1-ie.Visible

def GetBody(ie):
    WaitIE(ie)
    return ie.Document.body

def GetNodes(parentNode,tag):
    """
    >>> coldiv=GetNodes(body,"div")
    """
    childNodes=[]
    for childNode in parentNode.getElementsByTagName(tag):
        childNodes.append(childNode)
    return childNodes

def NodeByAttr(Nodes,nodeattr,nodeval):
    """
    >>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
    """
    for node in Nodes:
        if str(node.getAttribute(nodeattr))==nodeval:
            return node
    return None

def SetNode(node,val):
    node.innerHTML=val

if __name__=="__main__":
    url="about:blank"
    myie=NewIE(url)
    Visible(myie)
    mybody=GetBody(myie)
   
    SetNode(mybody,"<div>Genius</div>"*3)
    coldiv=GetNodes(mybody,"div")
    SetNode(coldiv[0],"<p>Hello</p>"*10)
    coldiv[0].SetAttribute("id","test")
    div_id_test=NodeByAttr(coldiv,"id","test")
    div_p=GetNodes(div_id_test,"p")
    for div_per_p in div_p:
        print div_per_p.innerHTML

然后就可以调用它了,实现插入HTML的功能:

#-*- coding:utf-8 -*-
import ie

url="http://hi.baidu.com/mirguest/creat/blog/"

myie=ie.openIE(url)
#ie.Visible(myie)
mybody=ie.GetBody(myie)

# Header
input_ids=ie.GetNodes(mybody,"input")
input_id_spBlogtitle=ie.NodeByAttr(input_ids,"id","spBlogTitle")
if len(input_id_spBlogtitle.value)==0:
    input_id_spBlogtitle.value="Test_Generated_By_Python"

# Catalogy
# 设置分类索引,注意,从0开始
select_ids=ie.GetNodes(mybody,"select")
select_id_spBlogName=ie.NodeByAttr(select_ids,"id","spBlogCatName")
select_id_spBlogName.selectedIndex=2

# Editor
iframe_ids=ie.GetNodes(mybody,"iframe")
iframe_id_editor=ie.NodeByAttr(iframe_ids,"id","tangram_editor_iframe_TANGRAM__1")
iframe_id_editor.click()
sonbody=iframe_id_editor.contentWindow.Document.body
# Read HTML Source
# 此处,可以设为要读取的HTML源文件。
#f=open(r"..\myhtml.html","r")
#content=f.read().decode("utf-8")
#f.close()
# Set HTML Source
content=sonbody.innerHTML+"<div class='line'></div>"
ie.SetNode(sonbody,content)

# Submit
#div_ids=ie.GetNodes(mybody,"div")
#div_id_btnbox=ie.NodeByAttr(div_ids,"id","btn-box")
#submitbtn=div_id_btnbox.childNodes(0)
#submitbtn.click()
大家可以试试,其实很简单,就是获取 InternetExplorer 后,就开始对里面的文档进行处理。

关于 HTML DOM 可参考 http://www.w3school.com.cn/htmldom/index.asp 。

另外就是 IE 的,http://msdn.microsoft.com/en-us/library/aa752084(VS.85).aspx 。

阅读全文
类别:Python 查看评论
文章来源:http://hi.baidu.com/mirguest/blog/item/b42886f997fe1f77034f5634.html

posted on 2011-02-10 11:52 mirguest 阅读(846) 评论(0)  编辑 收藏 引用


只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   博问   Chat2DB   管理