首先需要导入的包
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import java.io.*;
然后建立一个DocumentBuilder对象
DocumentBuilderFactory docBuilderFactory=DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder=docBuilderFactory.newDocumentBuilder();
DocumentBuilder的parse方法可以把一个xml文件处理并返回Document类型的对象
Document document=docBuilder.parse(xmlFile);
之后就可以通过Document提供的方法进行访问了
假设有一个XML格式为
<entailment-corpus>
<pair id="001" contradiction="YES" type="negation">
<t>Tariq Aziz was not considered a member of Saddam's innermost circle.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
</pair>
<pair id="002" contradiction="YES" type="lexical">
<t>Tariq Aziz kept outside the closed circle of Saddam's Sunni Moslem cronies.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
</pair>
</entailment-corpus>
下面的程序把该XML修改为
<entailment-corpus>
<pair id="001" contradiction="YES" type="negation">
<t>Tariq Aziz was not considered a member of Saddam's innermost circle.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
<addElement1>addText1</addElement1>
<addElement2>addText2</addElement2>
</pair>
<pair id="002" contradiction="YES" type="lexical">
<t>Tariq Aziz kept outside the closed circle of Saddam's Sunni Moslem cronies.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
</pair>
</entailment-corpus>
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import java.io.*;
import java.util.*;
public class TestDom {
public void modifyXML(File xmlFile){
try{
DocumentBuilderFactory docBuilderFactory=DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder=docBuilderFactory.newDocumentBuilder();
Document document=docBuilder.parse(xmlFile);
Element root=document.getDocumentElement();
//System.out.println(root.getNodeName());
NodeList pairNodeList=root.getElementsByTagName("pair");
for(int i=0;i<pairNodeList.getLength();i++){
Node pairNode=pairNodeList.item(i); // tag <pair>
//System.out.println(pairNode.getNodeName());
NodeList pairChild=pairNode.getChildNodes(); //tag <t> <h> 会把空格也算上,因此pairChild.getLength()!=2
for(int j=0;j<pairChild.getLength();j++){
Node pairChildNode=pairChild.item(j);
//System.out.println(pairChildNode.getNodeName());
if(pairChildNode instanceof Element){ //由于会把空格算上,所以要判断是否是Element
Element pairChildElement=(Element)pairChildNode;
String text=((Text)pairChildElement.getFirstChild()).getData().trim();
System.out.println(text);
}
}
Element addElement1=document.createElement("addElement1");
addElement1.setTextContent("addText1");
pairNode.appendChild(addElement1);
Element addElement2=document.createElement("addElement2");
addElement2.setTextContent("addText2");
pairNode.appendChild(addElement2);
}
TransformerFactory tFactory =TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(new java.io.File("E:/我的文档/tmp/modify.xml"));
transformer.transform(source, result);
}
catch(ParserConfigurationException e){
e.printStackTrace();
}
catch (IOException e){
e.printStackTrace();
}
catch (SAXException e){
e.printStackTrace();
}
catch(TransformerException e){
e.printStackTrace();
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String path="E:/我的文档/tmp/数据/real_contradiction.xml";
TestDom testDom=new TestDom();
testDom.modifyXML(new File(path));
}
}
注意getChildNodes()会把空格也当做一个Node,因此在程序中要判断pairChildNode是不是instanceof Element
如果是Element,那么就可以获得其中的内容了。