先来张Picture,展示一下使用TinyHtmlParser解析的结果。
![](http://www.cppblog.com/images/cppblog_com/codejie/LingosHook/LingosHook_html.png)
处理HTML果然比TEXT简单、清晰多了。如下两个函数,就可以分解出结果,这也说明了,Lingoes的结果还是很有规律的,嘿嘿。。。
![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
int CViconECDictResultParser::ParserHTML(const wxString &html, CDBAccess::TRecordDataVector &vct) const
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedBlock.gif)
{
std::wstring str(html.begin(), html.end());
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
TinyHtmlParser::CDocumentObject doc;
if(doc.Load(str) != 0)
return -1;
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
vct.clear();
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
std::wstring body(ID.begin(), ID.end());
body = L"dict_body_" + body;
const TinyHtmlParser::CElementObject* pe = doc.FindFirstElement(L"DIV");
while(pe != NULL)
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
const TinyHtmlParser::CAttributeObject* pa = pe->FindAttribute(L"id");
if(pa != NULL && pa->value == body)
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
TinyHtmlParser::CDocumentObject::TElementStack tmpstack;
const TinyHtmlParser::CElementObject* pr = doc.FindFirstElement(pe, L"DIV", tmpstack);
while(pr != NULL)
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
pa = pr->FindAttribute(L"style");
if(pa != NULL && pa->value == L"\"MARGIN: 5px 0px\"")
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
CDBAccess::TRecordData rec;
if(GetRecord(&doc, pr, rec) != 0)
return -1;
rec.m_strHTML = html;
vct.push_back(rec);
}
pr = doc.FindNextElement(pe, L"DIV", tmpstack);
}
}
pe = doc.FindNextElement();
}
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
return 0;
}
![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
int CViconECDictResultParser::GetRecord(TinyHtmlParser::CDocumentObject* doc, const TinyHtmlParser::CElementObject* pr, CDBAccess::TRecordData& rec) const
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedBlock.gif)
{
CDBAccess::TResultVector vct;
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
TinyHtmlParser::CDocumentObject::TElementStack tmpstack;
const TinyHtmlParser::CElementObject* p = doc->FindFirstElement(pr, L"DIV", tmpstack);
while(p != NULL)
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
const TinyHtmlParser::CAttributeObject* pa = p->FindAttribute(L"style");
if(pa == NULL)
return -1;
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
if(pa->value == L"\"MARGIN: 0px 0px 5px; COLOR: #808080; LINE-HEIGHT: normal\"")
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{//word and symbol
if(p->child == NULL)
return -1;
if(p->child->child == NULL || p->child->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
rec.m_strWord = wxString(p->child->child->value.c_str(), wxConvISO8859_1);
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
if(p->child->sibling == NULL || p->child->sibling->child == NULL || p->child->sibling->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
rec.m_strSymbol = wxString(p->child->sibling->child->value.c_str(), wxConvISO8859_1);
}
else if(pa->value == L"\"MARGIN: 0px 0px 5px\"")
![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{//result and class
CDBAccess::TResultPair result;
if(p->child == NULL || p->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
result.second = wxString(p->child->value.c_str(),wxConvISO8859_1);
result.second.Trim(false);
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
if(p->child->child == NULL || p->child->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
result.first = StrToWC(wxString(p->child->child->value.c_str(), wxConvISO8859_1));
vct.push_back(result);
}
p = doc->FindNextElement(pr, L"DIV", tmpstack);
}
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
rec.m_vctResult.push_back(std::make_pair(TITLE, vct));
![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
return 0;
}
![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
将TinyHtmlParser合成到LingosHook里面才发现一个大问题--中文处理失败。因为整个解析过程全部都是用std::string,而不是std::wstring,这样导致在字符串分解过程中会丢失宽字符信息。于是--改,将std::string全部换成std::wstring,整个过程比我想象的简单,半小时搞定,怎么说呢,STL真好。。。
下面是TinyHtmlParser的代码,不长,下次再做实现说明,不过,常言道--“代码在手,天下我有”。。。
1
#ifndef __TINYHTMLPARSER_H__
2
#define __TINYHTMLPARSER_H__
3![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
4
#include <iostream>
5
#include <string>
6
#include <queue>
7
#include <stack>
8![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
9
namespace TinyHtmlParser
10![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedBlock.gif)
{
11![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
12![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
enum ElementType
{ ET_UNKNOWN = -1, ET_TAG = 0, ET_NODE, ET_ELEMENT };//0:just a tag, 1:no value, 2:have value
13![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
14
class CAttributeObject
15![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
16
public:
17
CAttributeObject(const std::wstring& a, const std::wstring& v)
18
: attr(a), value(v), next(NULL)
19![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
20
}
21![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
virtual ~CAttributeObject()
{}
22![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
23
void Show(std::wostream& os) const;
24
public:
25
std::wstring attr;
26
std::wstring value;
27
CAttributeObject* next;
28
};
29![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
30
class CElementObject
31![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
32
public:
33
CElementObject();
34
virtual ~CElementObject();
35![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
36
virtual int Analyse();
37![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
38
const CAttributeObject* FindAttribute(const std::wstring& attr) const;
39![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
40
void Show(std::wostream& os) const;
41
protected:
42
int AnalyseAttribute(const std::wstring& attr);
43
int MakeAttribute(const std::wstring& attr);
44
int MakeAttribute(const std::wstring& attr, const std::wstring& value);
45
void FreeAnalyseAttribute();
46
int AnalyseValue();
47
public:
48
ElementType type;
49
size_t level;
50
CElementObject* parent;
51
CElementObject* child;
52
CElementObject* sibling;
53![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
54
CAttributeObject* attrib;
55
public:
56
std::wstring tag;
57
std::wstring value;
58
};
59![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
60
class CParserData
61![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
62
public:
63![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
enum DataType
{ DT_UNKNOWN = -1, DT_TAG = 0, DT_VALUE, DT_END, DT_DONE, DT_TAG_VALUE };
64
public:
65
CParserData()
66
: type(DT_UNKNOWN)
67
, start(0)
68![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
69
}
70![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
virtual ~CParserData()
{}
71![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
72
public:
73
DataType type;
74
size_t start;
75
size_t end;
76
size_t vstart;
77
size_t vend;
78
};
79![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
80
class CDocumentObject
81![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
82
protected:
83
static const wchar_t TAG_LT = L'<';
84
static const wchar_t TAG_GT = L'>';
85
static const wchar_t TAG_SLASH = L'/';
86
static const wchar_t TAG_BSLASH = L'\\';
87
static const wchar_t TAG_AND = L'&';
88![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
89
typedef std::vector<CParserData> TDataVector;
90![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
91
typedef std::stack<CParserData> TDataStack;
92
struct TNodeData
93![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
94
size_t level;
95
CParserData tag;
96
CParserData value;
97
// CParserData end;
98
};
99
typedef std::deque<TNodeData> TNodeQueue;
100
public:
101
typedef std::stack<const CElementObject*> TElementStack;
102
public:
103
CDocumentObject();
104
virtual ~CDocumentObject();
105![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
106
int Load(const std::wstring& str);
107![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
108
const CElementObject* Root() const;
109![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
110
const CElementObject* FindFirstElement(const std::wstring& tag);
111
const CElementObject* FindNextElement();
112![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
113
const CElementObject* FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
114
const CElementObject* FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
115![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
116
const CAttributeObject* FindAttribute(const CElementObject* element, const std::wstring& attr);
117
118
void Show(std::wostream& os) const;
119
protected:
120
int PreProcess(const std::wstring& str, std::wstring& html);
121
int PreParser(const std::wstring& html, TNodeQueue& vct);
122
int Parser(const std::wstring& html, TNodeQueue& que);
123
private:
124
int PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data);
125
int PushValueData(const CParserData& data, TDataStack& datastack) const;
126
int PushTagData(const std::wstring& html, const CParserData& data, TDataStack& datatstack, TNodeQueue& nodeque) const;
127
128
int CheckSpecialTag(const std::wstring& html, const CParserData& data) const;
129
int CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const;
130
CElementObject* MakeElement(const std::wstring& html, const TNodeData& node, CElementObject* parent, CElementObject* sibling) const;
131![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
132
void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const;
133![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
134
void FreeElement(CElementObject* root);
135![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
136
const CElementObject* FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack);
137
private:
138
CElementObject* _root;
139
private:
140
std::wstring _findtag;
141
TElementStack _findstack;
142
};
143![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
144
}
145![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
146
#endif
147![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
1![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
2
#include "TinyHtmlParser.h"
3![](http://www.cppblog.com/Images/OutliningIndicators/None.gif)
4
namespace TinyHtmlParser
5![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedBlock.gif)
{
6![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
7
void CAttributeObject::Show(std::wostream& os) const
8![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
9
os << " attr : " << this->attr << " -- value = " << this->value << std::endl;
10
}
11![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
12
CElementObject::CElementObject()
13
: type(ET_UNKNOWN)
14
, level(0)
15
, parent(NULL)
16
, child(NULL)
17
, sibling(NULL)
18
, attrib(NULL)
19![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
20
}
21![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
22
CElementObject::~CElementObject()
23![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
24
FreeAnalyseAttribute();
25
}
26![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
27
int CElementObject::Analyse()
28![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
29
std::wstring str = tag;
30![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
31
std::wstring::size_type pos = str.find(L" ");
32
if(pos != std::wstring::npos)
33![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
34
tag = str.substr(0, pos);
35![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
36
str = str.substr(pos + 1);
37
if(AnalyseAttribute(str) != 0)
38![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
39
return -1;
40
}
41
}
42
if(type == ET_ELEMENT)
43![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
44
if(AnalyseValue() != 0)
45
return -1;
46
}
47
return 0;
48
}
49![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
50
int CElementObject::AnalyseAttribute(const std::wstring& attr)
51![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
52
if(attr.size() == 0)
53
return 0;
54![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
55
std::wstring a, v;
56
std::wstring::size_type pos = attr.find(L"="), start = 0;
57
while(pos != std::wstring::npos)
58![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
59
a = attr.substr(start, pos - start);
60
if(pos == attr.size() - 1)
61
return -1;
62
start = pos + 1;
63
if(attr[pos + 1] == L'\"')
64![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
65
pos = attr.find(L"\"", start + 1);
66
if(pos == std::wstring::npos)
67
return -1;
68
v = attr.substr(start, pos - start + 1);
69
start = pos + 2;
70
}
71
else
72![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
73
pos = attr.find(L" ", start);
74
if(pos == std::wstring::npos)
75
pos = attr.size();
76
v = attr.substr(start, pos - start);
77
start = pos + 1;
78
}
79
if(MakeAttribute(a, v) != 0)
80
return -1;
81![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
82
if(start >= attr.size())
83
break;
84![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
85
pos = attr.find(L"=", start);
86
}
87
return 0;
88
}
89![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
90
int CElementObject::MakeAttribute(const std::wstring &attr)
91![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
92
std::wstring::size_type pos = attr.find(L"=");
93
if(pos == std::wstring::npos)
94
return -1;
95![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
96
return MakeAttribute(attr.substr(0, pos), attr.substr(pos));
97
}
98![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
99
int CElementObject::MakeAttribute(const std::wstring &attr, const std::wstring& value)
100![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
101
std::auto_ptr<CAttributeObject> obj(new CAttributeObject(attr, value));//attr.substr(0, pos), attr.substr(pos)));
102
103
if(attrib != NULL)
104![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
105
CAttributeObject* tmp = attrib;
106
while(tmp->next != NULL)
107
tmp = tmp->next;
108
tmp->next = obj.release();
109
}
110
else
111![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
112
attrib = obj.release();
113
}
114
return 0;
115
}
116![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
117![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
118
void CElementObject::FreeAnalyseAttribute()
119![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
120
CAttributeObject* tmp = attrib;
121
while(attrib != NULL)
122![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
123
tmp = attrib->next;
124
delete attrib;
125
attrib = tmp;
126
}
127![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
128
}
129![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
130
int CElementObject::AnalyseValue()
131![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
132
std::wstring::size_type pos = this->value.find(L" ");
133
while(pos != std::wstring::npos)
134![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
135
this->value.replace(pos, 6, L" ");
136
pos = this->value.find(L" ", pos + 1);
137
}
138![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
139
return 0;
140
}
141![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
142
const CAttributeObject* CElementObject::FindAttribute(const std::wstring& attr) const
143![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
144
const CAttributeObject* pa = this->attrib;
145
while(pa != NULL)
146![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
147
if(pa->attr == attr)
148
return pa;
149
pa = pa->next;
150
}
151
return pa;
152
}
153![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
154
void CElementObject::Show(std::wostream& os) const
155![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
156
os << "[" << this->level << "]" << "Tag : " << this->tag;
157
if(this->type == ET_ELEMENT)
158
os << " -- value = " << this->value;
159
os << std::endl;
160![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
161
const CAttributeObject* attr = this->attrib;
162
while(attr != NULL)
163![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
164
attr->Show(os);
165
attr = attr->next;
166
}
167
os << std::endl;
168
}
169
//
170![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
171
CDocumentObject::CDocumentObject()
172
: _root(NULL)
173![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
174
}
175![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
176
CDocumentObject::~CDocumentObject()
177![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
178
if(_root != NULL)
179
FreeElement(_root);
180
}
181![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
182
int CDocumentObject::Load(const std::wstring &str)
183![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
184
std::wstring html;
185
if(PreProcess(str, html) != 0)
186
return -1;
187
TNodeQueue que;
188
if(PreParser(html, que) != 0)
189
return -1;
190
if(Parser(html, que) != 0)
191
return -1;
192
return 0;
193
}
194![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
195
int CDocumentObject::PreProcess(const std::wstring& str, std::wstring& html)
196![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
197
bool tag = false;
198
for(std::wstring::const_iterator it = str.begin(); it != str.end(); ++ it)
199![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
200
if(*it == TAG_LT)
201![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
202
if(tag == true)
203
return -1;
204
tag = true;
205
}
206
else if(*it == TAG_GT)
207![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
208
if(tag == false)
209
return -1;
210
tag = false;
211
}
212
else
213![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
214
if(tag == false)
215![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
216
if(isspace((unsigned char)*it) != 0)
217
continue;
218
}
219
}
220
html += *it;
221
}
222![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
223
return 0;
224
}
225![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
226
int CDocumentObject::PreParser(const std::wstring& html, CDocumentObject::TNodeQueue& que)
227![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
228
std::wstring::size_type pos = 0;
229![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
230
if(html.size() == 0)
231
return -1;
232
if(html[pos] != TAG_LT)
233
return -1;
234![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
235
TDataStack datastack;
236![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
237
CParserData data;
238![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
239
while(pos < html.size())
240![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
241
if(html[pos] == TAG_LT)
242![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
243
if(pos > data.start)
244![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
245
data.type = CParserData::DT_VALUE;
246
data.end = pos;
247![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
248
// std::cout << "VALUE - " << html.substr(data.start, data.end - data.start) << std::endl;
249
if(PushValueData(data, datastack) != 0)
250
return -1;
251
}
252![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
253
if(PreParserLT(html, pos, data) != 0)
254
return -1;
255
// std::cout << "TAG - " << html.substr(data.start, data.end - data.start) << std::endl;
256
if(PushTagData(html, data, datastack, que) != 0)
257
return -1;
258![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
259
++ pos;
260
data.start = pos;
261
}
262
//else if(html[pos] == TAG_GT || html[pos] == TAG_SLASH)
263
//{
264
// return -1;
265
//}
266
else
267![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
268
++ pos;
269
}
270
// std::cout << (char)html[pos] << std::endl;
271
}
272![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
273
return 0;
274
}
275![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
276
int CDocumentObject::Parser(const std::wstring& html, CDocumentObject::TNodeQueue& que)
277![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
278
CElementObject *pe = NULL, *pp = NULL, *ps = NULL;
279
size_t level = 0;
280
while(que.size()> 0)
281![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
282
const TNodeData &node = que.front();
283
if(level < que.front().level)
284![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
285
pp = pe;
286
ps = NULL;
287
}
288
else if(level == que.front().level)
289![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
290
ps = pe;
291
}
292
else//>
293![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
294
ps = pe;
295
pp = pe->parent;
296
int t = level - que.front().level;
297
while(t > 0)
298![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
299
ps = ps->parent;
300
pp = pp->parent;
301
-- t;
302
}
303
}
304
level = que.front().level;
305![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
306
pe = MakeElement(html, que.front(), pp, ps);
307![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
308
if(pe == NULL)
309
return -1;
310![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
311
que.pop_front();
312
}
313![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
314
if(pp != NULL)
315![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
316
while(pp->parent != NULL)
317
pp = pp->parent;
318
_root = pp;
319
}
320
else
321
_root = pe;
322![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
323
return 0;
324
}
325![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
326
int CDocumentObject::PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data)
327![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
328
if(pos == html.size() - 1)
329
return -1;
330![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
331
data.start = pos;
332![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
333
++ pos;
334![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
335
if(html[pos] != TAG_SLASH)
336![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
337
data.type = CParserData::DT_TAG;
338
}
339
else
340![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
341
data.type = CParserData::DT_END;
342
++ pos;
343
}
344![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
345
while(pos < html.size())
346![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
347
if(html[pos] == TAG_GT)
348![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
349
if(html[pos - 1] == TAG_SLASH)
350![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
351
data.type = CParserData::DT_DONE;
352
}
353![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
354
data.end = pos;
355
356
return 0;
357
}
358
else if(html[pos] == TAG_LT)
359![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
360
return -1;
361
}
362![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
363
++ pos;
364
}
365![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
366
return -1;
367
}
368![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
369
int CDocumentObject::PushValueData(const TinyHtmlParser::CParserData &data, CDocumentObject::TDataStack &datastack) const
370![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
371
if(datastack.size() == 0)
372
return -1;
373
datastack.push(data);
374
return 0;
375
}
376![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
377
int CDocumentObject::PushTagData(const std::wstring& html, const CParserData& data, CDocumentObject::TDataStack& datastack, CDocumentObject::TNodeQueue& nodeque) const
378![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
379
if(data.type == CParserData::DT_TAG)
380![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
381
if(CheckSpecialTag(html, data) == 0)
382![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
383
TNodeData node;
384
node.tag = data;
385![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
386
node.level = datastack.size();
387
nodeque.push_front(node);
388
return 0;
389
}
390![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
391
if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
392![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
393
CParserData data = datastack.top();
394
datastack.pop();
395
if(datastack.top().type != CParserData::DT_TAG)
396
return -1;
397
datastack.top().type = CParserData::DT_TAG_VALUE;
398
datastack.top().vstart = data.start;
399
datastack.top().vend = data.end;
400
}
401![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
402
datastack.push(data);
403
}
404
else if(data.type == CParserData::DT_END)
405![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
406
if(datastack.size() == 0)
407
return -1;
408![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
409
TNodeData node;
410
if(datastack.top().type == CParserData::DT_TAG || datastack.top().type == CParserData::DT_TAG_VALUE)
411![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
412
node.tag = datastack.top();
413
datastack.pop();
414
}
415
else if(datastack.top().type == CParserData::DT_VALUE)
416![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
417
node.value = datastack.top();
418![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
419
// std::cout << "value - " << html.substr(node.value.start, node.value.end - node.value.start) << std::endl;
420![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
421
datastack.pop();
422
423
if(datastack.size() == 0)
424
return -1;
425![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
426
if(datastack.top().type == CParserData::DT_TAG)
427![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
428
node.tag = datastack.top();
429
}
430
else if(datastack.top().type == CParserData::DT_TAG_VALUE)
431![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
432
node.tag = datastack.top();
433
}
434
else
435![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
436
return -1;
437
}
438![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
439
//node.tag = datastack.top();
440
//else if(datastack.top().type == CParserData::DT_TAG_VALUE)
441
//{
442
// node.tag = datastack.top();
443
//}
444
datastack.pop();
445
}
446
else
447![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
448
// std::cout << "type : " << datastack.top().type << std::endl;
449
return -1;
450
}
451![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
452
if(CheckTag(html, node.tag, data) != 0)
453
return -1;
454![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
455
node.level = datastack.size();
456
nodeque.push_front(node);
457
}
458
else if(data.type == CParserData::DT_DONE)
459![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
460
if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
461![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
462
CParserData data = datastack.top();
463
datastack.pop();
464
if(datastack.top().type != CParserData::DT_TAG)
465
return -1;
466
datastack.top().type = CParserData::DT_TAG_VALUE;
467
datastack.top().vstart = data.start;
468
datastack.top().vend = data.end;
469
}
470![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
471
// datastack.push(data);
472![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
473
TNodeData node;
474
node.tag = data;
475![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
476
node.level = datastack.size();
477
nodeque.push_front(node);
478
}
479
else
480![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
481
return -1;
482
}
483
return 0;
484
}
485![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
486
int CDocumentObject::CheckSpecialTag(const std::wstring& html, const CParserData& data) const
487![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
488
std::wstring tag = html.substr(data.start + 1, data.end - data.start - 1);
489
std::wstring::size_type pos = tag.find(L" ");
490
if(pos != std::wstring::npos)
491
tag = tag.substr(0, pos);
492
493
if(tag == L"IMG")
494
return 0;
495![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
496
return -1;
497
}
498![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
499
int CDocumentObject::CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const
500![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
501
std::wstring str = html.substr(tag.start + 1, tag.end - tag.start - 1);
502
std::wstring::size_type pos = str.find(L" ");
503
if(pos != std::wstring::npos)
504
str = str.substr(0, pos);
505
506
if(str != html.substr(end.start + 2, end.end - end.start - 2))
507![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
508
// std::cout << "tag : " << str << " -- end : " << html.substr(end.start + 2, end.end - end.start - 2) << std::endl;
509
return -1;
510
}
511
return 0;
512
}
513![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
514
CElementObject* CDocumentObject::MakeElement(const std::wstring& html, const CDocumentObject::TNodeData &node, CElementObject *parent, CElementObject *sibling) const
515![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
516
std::auto_ptr<CElementObject> ele(new CElementObject);
517
518
ele->level = node.level;
519![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
520
if(node.tag.type == CParserData::DT_TAG)
521![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
522
ele->type = ET_NODE;
523
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
524
}
525
else if(node.tag.type == CParserData::DT_DONE)
526![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
527
ele->type = ET_TAG;
528
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 2);
529
}
530
else if(node.tag.type == CParserData::DT_TAG_VALUE)
531![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
532
ele->tag = ET_NODE;
533
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
534
}
535
else
536
return NULL;
537![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
538
if(node.value.type == CParserData::DT_VALUE)
539![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
540
ele->type = ET_ELEMENT;
541
if(node.tag.type == CParserData::DT_TAG)
542
ele->value = html.substr(node.value.start, node.value.end - node.value.start);
543
else
544
ele->value = html.substr(node.tag.vstart, node.tag.vend - node.tag.vstart) + L"%" + html.substr(node.value.start, node.value.end - node.value.start);
545
}
546![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
547
if(ele->Analyse() != 0)
548![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
549
return NULL;
550
}
551![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
552
if(parent != NULL)
553
parent->child = ele.get();
554
ele->parent = parent;
555
ele->sibling = sibling;
556![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
557
//std::cout << "element: tag - " << ele->tag << std::endl;
558![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
559
return ele.release();
560
}
561![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
562
void CDocumentObject::Show(std::wostream &os) const
563![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
564
if(_root != NULL)
565
ShowElement(os, _root);
566
}
567![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
568
void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const
569![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
570
const CElementObject* pe = e, *ps = e->sibling;
571![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
572
pe->Show(os);
573
574
pe = pe->child;
575
if(pe != NULL)
576![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
577
ShowElement(os, pe);
578
}
579
if(ps != NULL)
580![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
581
ShowElement(os, ps);
582
}
583
}
584![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
585
void CDocumentObject::FreeElement(CElementObject* root)
586![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
587
CElementObject* pe = root->child, *ps = root->sibling;
588![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
589
// std::cout << "free:" << root->tag << std::endl;
590![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
591
if(root != NULL)
592![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
593
free(root);
594
root = NULL;
595
}
596![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
597
if(pe != NULL)
598![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
599
FreeElement(pe);
600
}
601
if(ps != NULL)
602![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
603
FreeElement(ps);
604
}
605
}
606![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
607
const CElementObject* CDocumentObject::FindFirstElement(const std::wstring &tag)
608![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
609
if(_root == NULL)
610
return NULL;
611![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
612
_findtag = tag;
613
while(!_findstack.empty())
614
_findstack.pop();
615
616
return FindElement(NULL, _root, _findtag, _findstack);
617
}
618![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
619
const CElementObject* CDocumentObject::FindNextElement()
620![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
621
if(_findstack.empty())
622
return NULL;
623![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
624
return FindElement(NULL, _findstack.top()->child, _findtag, _findstack);
625
}
626![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
627
const CElementObject* CDocumentObject::FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
628![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
629
if(element == NULL)
630
return NULL;
631![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
632
while(!tmpstack.empty())
633
tmpstack.pop();
634![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
635
return FindElement(element, element, tag, tmpstack);
636
}
637![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
638
const CElementObject* CDocumentObject::FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
639![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
640
if(tmpstack.empty())
641
return NULL;
642![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
643
return FindElement(element, tmpstack.top()->child, tag, tmpstack);
644
}
645![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
646
const CElementObject* CDocumentObject::FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack)
647![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
648
while(pe != NULL)
649![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
650
stack.push(pe);
651
if(pe->tag == tag)
652
return pe;
653
pe = pe->child;
654
}
655
656
while(!stack.empty() && stack.top() != root && pe == NULL)
657![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
658
pe = stack.top()->sibling;
659
stack.pop();
660
}
661![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
662
if(pe == NULL)
663
return NULL;
664![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
665
return FindElement(root, pe, tag, stack);
666
}
667![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
668
const CAttributeObject* CDocumentObject::FindAttribute(const TinyHtmlParser::CElementObject *element, const std::wstring &attr)
669![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![](http://www.cppblog.com/Images/OutliningIndicators/ContractedSubBlock.gif)
{
670
if(element == NULL)
671
return NULL;
672
673
const CAttributeObject* pa = element->attrib;
674
while(pa != NULL)
675![](http://www.cppblog.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
676
if(pa->attr == attr)
677
return pa;
678
pa = pa->next;
679
}
680
return pa;
681
}
682![](http://www.cppblog.com/Images/OutliningIndicators/InBlock.gif)
683
}