先来张Picture,展示一下使用TinyHtmlParser解析的结果。

处理HTML果然比TEXT简单、清晰多了。如下两个函数,就可以分解出结果,这也说明了,Lingoes的结果还是很有规律的,嘿嘿。。。

int CViconECDictResultParser::ParserHTML(const wxString &html, CDBAccess::TRecordDataVector &vct) const


{
std::wstring str(html.begin(), html.end());

TinyHtmlParser::CDocumentObject doc;
if(doc.Load(str) != 0)
return -1;

vct.clear();

std::wstring body(ID.begin(), ID.end());
body = L"dict_body_" + body;
const TinyHtmlParser::CElementObject* pe = doc.FindFirstElement(L"DIV");
while(pe != NULL)

{
const TinyHtmlParser::CAttributeObject* pa = pe->FindAttribute(L"id");
if(pa != NULL && pa->value == body)

{
TinyHtmlParser::CDocumentObject::TElementStack tmpstack;
const TinyHtmlParser::CElementObject* pr = doc.FindFirstElement(pe, L"DIV", tmpstack);
while(pr != NULL)

{
pa = pr->FindAttribute(L"style");
if(pa != NULL && pa->value == L"\"MARGIN: 5px 0px\"")

{
CDBAccess::TRecordData rec;
if(GetRecord(&doc, pr, rec) != 0)
return -1;
rec.m_strHTML = html;
vct.push_back(rec);
}
pr = doc.FindNextElement(pe, L"DIV", tmpstack);
}
}
pe = doc.FindNextElement();
}

return 0;
}

int CViconECDictResultParser::GetRecord(TinyHtmlParser::CDocumentObject* doc, const TinyHtmlParser::CElementObject* pr, CDBAccess::TRecordData& rec) const


{
CDBAccess::TResultVector vct;

TinyHtmlParser::CDocumentObject::TElementStack tmpstack;
const TinyHtmlParser::CElementObject* p = doc->FindFirstElement(pr, L"DIV", tmpstack);
while(p != NULL)

{
const TinyHtmlParser::CAttributeObject* pa = p->FindAttribute(L"style");
if(pa == NULL)
return -1;

if(pa->value == L"\"MARGIN: 0px 0px 5px; COLOR: #808080; LINE-HEIGHT: normal\"")

{//word and symbol
if(p->child == NULL)
return -1;
if(p->child->child == NULL || p->child->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
rec.m_strWord = wxString(p->child->child->value.c_str(), wxConvISO8859_1);

if(p->child->sibling == NULL || p->child->sibling->child == NULL || p->child->sibling->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
rec.m_strSymbol = wxString(p->child->sibling->child->value.c_str(), wxConvISO8859_1);
}
else if(pa->value == L"\"MARGIN: 0px 0px 5px\"")

{//result and class
CDBAccess::TResultPair result;
if(p->child == NULL || p->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
result.second = wxString(p->child->value.c_str(),wxConvISO8859_1);
result.second.Trim(false);

if(p->child->child == NULL || p->child->child->type != TinyHtmlParser::ET_ELEMENT)
return -1;
result.first = StrToWC(wxString(p->child->child->value.c_str(), wxConvISO8859_1));
vct.push_back(result);
}
p = doc->FindNextElement(pr, L"DIV", tmpstack);
}

rec.m_vctResult.push_back(std::make_pair(TITLE, vct));

return 0;
}

将TinyHtmlParser合成到LingosHook里面才发现一个大问题--中文处理失败。因为整个解析过程全部都是用std::string,而不是std::wstring,这样导致在字符串分解过程中会丢失宽字符信息。于是--改,将std::string全部换成std::wstring,整个过程比我想象的简单,半小时搞定,怎么说呢,STL真好。。。
下面是TinyHtmlParser的代码,不长,下次再做实现说明,不过,常言道--“代码在手,天下我有”。。。
1
#ifndef __TINYHTMLPARSER_H__
2
#define __TINYHTMLPARSER_H__
3
4
#include <iostream>
5
#include <string>
6
#include <queue>
7
#include <stack>
8
9
namespace TinyHtmlParser
10

{
11
12
enum ElementType
{ ET_UNKNOWN = -1, ET_TAG = 0, ET_NODE, ET_ELEMENT };//0:just a tag, 1:no value, 2:have value
13
14
class CAttributeObject
15

{
16
public:
17
CAttributeObject(const std::wstring& a, const std::wstring& v)
18
: attr(a), value(v), next(NULL)
19
{
20
}
21
virtual ~CAttributeObject()
{}
22
23
void Show(std::wostream& os) const;
24
public:
25
std::wstring attr;
26
std::wstring value;
27
CAttributeObject* next;
28
};
29
30
class CElementObject
31

{
32
public:
33
CElementObject();
34
virtual ~CElementObject();
35
36
virtual int Analyse();
37
38
const CAttributeObject* FindAttribute(const std::wstring& attr) const;
39
40
void Show(std::wostream& os) const;
41
protected:
42
int AnalyseAttribute(const std::wstring& attr);
43
int MakeAttribute(const std::wstring& attr);
44
int MakeAttribute(const std::wstring& attr, const std::wstring& value);
45
void FreeAnalyseAttribute();
46
int AnalyseValue();
47
public:
48
ElementType type;
49
size_t level;
50
CElementObject* parent;
51
CElementObject* child;
52
CElementObject* sibling;
53
54
CAttributeObject* attrib;
55
public:
56
std::wstring tag;
57
std::wstring value;
58
};
59
60
class CParserData
61

{
62
public:
63
enum DataType
{ DT_UNKNOWN = -1, DT_TAG = 0, DT_VALUE, DT_END, DT_DONE, DT_TAG_VALUE };
64
public:
65
CParserData()
66
: type(DT_UNKNOWN)
67
, start(0)
68
{
69
}
70
virtual ~CParserData()
{}
71
72
public:
73
DataType type;
74
size_t start;
75
size_t end;
76
size_t vstart;
77
size_t vend;
78
};
79
80
class CDocumentObject
81

{
82
protected:
83
static const wchar_t TAG_LT = L'<';
84
static const wchar_t TAG_GT = L'>';
85
static const wchar_t TAG_SLASH = L'/';
86
static const wchar_t TAG_BSLASH = L'\\';
87
static const wchar_t TAG_AND = L'&';
88
89
typedef std::vector<CParserData> TDataVector;
90
91
typedef std::stack<CParserData> TDataStack;
92
struct TNodeData
93
{
94
size_t level;
95
CParserData tag;
96
CParserData value;
97
// CParserData end;
98
};
99
typedef std::deque<TNodeData> TNodeQueue;
100
public:
101
typedef std::stack<const CElementObject*> TElementStack;
102
public:
103
CDocumentObject();
104
virtual ~CDocumentObject();
105
106
int Load(const std::wstring& str);
107
108
const CElementObject* Root() const;
109
110
const CElementObject* FindFirstElement(const std::wstring& tag);
111
const CElementObject* FindNextElement();
112
113
const CElementObject* FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
114
const CElementObject* FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
115
116
const CAttributeObject* FindAttribute(const CElementObject* element, const std::wstring& attr);
117
118
void Show(std::wostream& os) const;
119
protected:
120
int PreProcess(const std::wstring& str, std::wstring& html);
121
int PreParser(const std::wstring& html, TNodeQueue& vct);
122
int Parser(const std::wstring& html, TNodeQueue& que);
123
private:
124
int PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data);
125
int PushValueData(const CParserData& data, TDataStack& datastack) const;
126
int PushTagData(const std::wstring& html, const CParserData& data, TDataStack& datatstack, TNodeQueue& nodeque) const;
127
128
int CheckSpecialTag(const std::wstring& html, const CParserData& data) const;
129
int CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const;
130
CElementObject* MakeElement(const std::wstring& html, const TNodeData& node, CElementObject* parent, CElementObject* sibling) const;
131
132
void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const;
133
134
void FreeElement(CElementObject* root);
135
136
const CElementObject* FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack);
137
private:
138
CElementObject* _root;
139
private:
140
std::wstring _findtag;
141
TElementStack _findstack;
142
};
143
144
}
145
146
#endif
147
1
2
#include "TinyHtmlParser.h"
3
4
namespace TinyHtmlParser
5

{
6
7
void CAttributeObject::Show(std::wostream& os) const
8

{
9
os << " attr : " << this->attr << " -- value = " << this->value << std::endl;
10
}
11
12
CElementObject::CElementObject()
13
: type(ET_UNKNOWN)
14
, level(0)
15
, parent(NULL)
16
, child(NULL)
17
, sibling(NULL)
18
, attrib(NULL)
19

{
20
}
21
22
CElementObject::~CElementObject()
23

{
24
FreeAnalyseAttribute();
25
}
26
27
int CElementObject::Analyse()
28

{
29
std::wstring str = tag;
30
31
std::wstring::size_type pos = str.find(L" ");
32
if(pos != std::wstring::npos)
33
{
34
tag = str.substr(0, pos);
35
36
str = str.substr(pos + 1);
37
if(AnalyseAttribute(str) != 0)
38
{
39
return -1;
40
}
41
}
42
if(type == ET_ELEMENT)
43
{
44
if(AnalyseValue() != 0)
45
return -1;
46
}
47
return 0;
48
}
49
50
int CElementObject::AnalyseAttribute(const std::wstring& attr)
51

{
52
if(attr.size() == 0)
53
return 0;
54
55
std::wstring a, v;
56
std::wstring::size_type pos = attr.find(L"="), start = 0;
57
while(pos != std::wstring::npos)
58
{
59
a = attr.substr(start, pos - start);
60
if(pos == attr.size() - 1)
61
return -1;
62
start = pos + 1;
63
if(attr[pos + 1] == L'\"')
64
{
65
pos = attr.find(L"\"", start + 1);
66
if(pos == std::wstring::npos)
67
return -1;
68
v = attr.substr(start, pos - start + 1);
69
start = pos + 2;
70
}
71
else
72
{
73
pos = attr.find(L" ", start);
74
if(pos == std::wstring::npos)
75
pos = attr.size();
76
v = attr.substr(start, pos - start);
77
start = pos + 1;
78
}
79
if(MakeAttribute(a, v) != 0)
80
return -1;
81
82
if(start >= attr.size())
83
break;
84
85
pos = attr.find(L"=", start);
86
}
87
return 0;
88
}
89
90
int CElementObject::MakeAttribute(const std::wstring &attr)
91

{
92
std::wstring::size_type pos = attr.find(L"=");
93
if(pos == std::wstring::npos)
94
return -1;
95
96
return MakeAttribute(attr.substr(0, pos), attr.substr(pos));
97
}
98
99
int CElementObject::MakeAttribute(const std::wstring &attr, const std::wstring& value)
100

{
101
std::auto_ptr<CAttributeObject> obj(new CAttributeObject(attr, value));//attr.substr(0, pos), attr.substr(pos)));
102
103
if(attrib != NULL)
104
{
105
CAttributeObject* tmp = attrib;
106
while(tmp->next != NULL)
107
tmp = tmp->next;
108
tmp->next = obj.release();
109
}
110
else
111
{
112
attrib = obj.release();
113
}
114
return 0;
115
}
116
117
118
void CElementObject::FreeAnalyseAttribute()
119

{
120
CAttributeObject* tmp = attrib;
121
while(attrib != NULL)
122
{
123
tmp = attrib->next;
124
delete attrib;
125
attrib = tmp;
126
}
127
128
}
129
130
int CElementObject::AnalyseValue()
131

{
132
std::wstring::size_type pos = this->value.find(L" ");
133
while(pos != std::wstring::npos)
134
{
135
this->value.replace(pos, 6, L" ");
136
pos = this->value.find(L" ", pos + 1);
137
}
138
139
return 0;
140
}
141
142
const CAttributeObject* CElementObject::FindAttribute(const std::wstring& attr) const
143

{
144
const CAttributeObject* pa = this->attrib;
145
while(pa != NULL)
146
{
147
if(pa->attr == attr)
148
return pa;
149
pa = pa->next;
150
}
151
return pa;
152
}
153
154
void CElementObject::Show(std::wostream& os) const
155

{
156
os << "[" << this->level << "]" << "Tag : " << this->tag;
157
if(this->type == ET_ELEMENT)
158
os << " -- value = " << this->value;
159
os << std::endl;
160
161
const CAttributeObject* attr = this->attrib;
162
while(attr != NULL)
163
{
164
attr->Show(os);
165
attr = attr->next;
166
}
167
os << std::endl;
168
}
169
//
170
171
CDocumentObject::CDocumentObject()
172
: _root(NULL)
173

{
174
}
175
176
CDocumentObject::~CDocumentObject()
177

{
178
if(_root != NULL)
179
FreeElement(_root);
180
}
181
182
int CDocumentObject::Load(const std::wstring &str)
183

{
184
std::wstring html;
185
if(PreProcess(str, html) != 0)
186
return -1;
187
TNodeQueue que;
188
if(PreParser(html, que) != 0)
189
return -1;
190
if(Parser(html, que) != 0)
191
return -1;
192
return 0;
193
}
194
195
int CDocumentObject::PreProcess(const std::wstring& str, std::wstring& html)
196

{
197
bool tag = false;
198
for(std::wstring::const_iterator it = str.begin(); it != str.end(); ++ it)
199
{
200
if(*it == TAG_LT)
201
{
202
if(tag == true)
203
return -1;
204
tag = true;
205
}
206
else if(*it == TAG_GT)
207
{
208
if(tag == false)
209
return -1;
210
tag = false;
211
}
212
else
213
{
214
if(tag == false)
215
{
216
if(isspace((unsigned char)*it) != 0)
217
continue;
218
}
219
}
220
html += *it;
221
}
222
223
return 0;
224
}
225
226
int CDocumentObject::PreParser(const std::wstring& html, CDocumentObject::TNodeQueue& que)
227

{
228
std::wstring::size_type pos = 0;
229
230
if(html.size() == 0)
231
return -1;
232
if(html[pos] != TAG_LT)
233
return -1;
234
235
TDataStack datastack;
236
237
CParserData data;
238
239
while(pos < html.size())
240
{
241
if(html[pos] == TAG_LT)
242
{
243
if(pos > data.start)
244
{
245
data.type = CParserData::DT_VALUE;
246
data.end = pos;
247
248
// std::cout << "VALUE - " << html.substr(data.start, data.end - data.start) << std::endl;
249
if(PushValueData(data, datastack) != 0)
250
return -1;
251
}
252
253
if(PreParserLT(html, pos, data) != 0)
254
return -1;
255
// std::cout << "TAG - " << html.substr(data.start, data.end - data.start) << std::endl;
256
if(PushTagData(html, data, datastack, que) != 0)
257
return -1;
258
259
++ pos;
260
data.start = pos;
261
}
262
//else if(html[pos] == TAG_GT || html[pos] == TAG_SLASH)
263
//{
264
// return -1;
265
//}
266
else
267
{
268
++ pos;
269
}
270
// std::cout << (char)html[pos] << std::endl;
271
}
272
273
return 0;
274
}
275
276
int CDocumentObject::Parser(const std::wstring& html, CDocumentObject::TNodeQueue& que)
277

{
278
CElementObject *pe = NULL, *pp = NULL, *ps = NULL;
279
size_t level = 0;
280
while(que.size()> 0)
281
{
282
const TNodeData &node = que.front();
283
if(level < que.front().level)
284
{
285
pp = pe;
286
ps = NULL;
287
}
288
else if(level == que.front().level)
289
{
290
ps = pe;
291
}
292
else//>
293
{
294
ps = pe;
295
pp = pe->parent;
296
int t = level - que.front().level;
297
while(t > 0)
298
{
299
ps = ps->parent;
300
pp = pp->parent;
301
-- t;
302
}
303
}
304
level = que.front().level;
305
306
pe = MakeElement(html, que.front(), pp, ps);
307
308
if(pe == NULL)
309
return -1;
310
311
que.pop_front();
312
}
313
314
if(pp != NULL)
315
{
316
while(pp->parent != NULL)
317
pp = pp->parent;
318
_root = pp;
319
}
320
else
321
_root = pe;
322
323
return 0;
324
}
325
326
int CDocumentObject::PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data)
327

{
328
if(pos == html.size() - 1)
329
return -1;
330
331
data.start = pos;
332
333
++ pos;
334
335
if(html[pos] != TAG_SLASH)
336
{
337
data.type = CParserData::DT_TAG;
338
}
339
else
340
{
341
data.type = CParserData::DT_END;
342
++ pos;
343
}
344
345
while(pos < html.size())
346
{
347
if(html[pos] == TAG_GT)
348
{
349
if(html[pos - 1] == TAG_SLASH)
350
{
351
data.type = CParserData::DT_DONE;
352
}
353
354
data.end = pos;
355
356
return 0;
357
}
358
else if(html[pos] == TAG_LT)
359
{
360
return -1;
361
}
362
363
++ pos;
364
}
365
366
return -1;
367
}
368
369
int CDocumentObject::PushValueData(const TinyHtmlParser::CParserData &data, CDocumentObject::TDataStack &datastack) const
370

{
371
if(datastack.size() == 0)
372
return -1;
373
datastack.push(data);
374
return 0;
375
}
376
377
int CDocumentObject::PushTagData(const std::wstring& html, const CParserData& data, CDocumentObject::TDataStack& datastack, CDocumentObject::TNodeQueue& nodeque) const
378

{
379
if(data.type == CParserData::DT_TAG)
380
{
381
if(CheckSpecialTag(html, data) == 0)
382
{
383
TNodeData node;
384
node.tag = data;
385
386
node.level = datastack.size();
387
nodeque.push_front(node);
388
return 0;
389
}
390
391
if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
392
{
393
CParserData data = datastack.top();
394
datastack.pop();
395
if(datastack.top().type != CParserData::DT_TAG)
396
return -1;
397
datastack.top().type = CParserData::DT_TAG_VALUE;
398
datastack.top().vstart = data.start;
399
datastack.top().vend = data.end;
400
}
401
402
datastack.push(data);
403
}
404
else if(data.type == CParserData::DT_END)
405
{
406
if(datastack.size() == 0)
407
return -1;
408
409
TNodeData node;
410
if(datastack.top().type == CParserData::DT_TAG || datastack.top().type == CParserData::DT_TAG_VALUE)
411
{
412
node.tag = datastack.top();
413
datastack.pop();
414
}
415
else if(datastack.top().type == CParserData::DT_VALUE)
416
{
417
node.value = datastack.top();
418
419
// std::cout << "value - " << html.substr(node.value.start, node.value.end - node.value.start) << std::endl;
420
421
datastack.pop();
422
423
if(datastack.size() == 0)
424
return -1;
425
426
if(datastack.top().type == CParserData::DT_TAG)
427
{
428
node.tag = datastack.top();
429
}
430
else if(datastack.top().type == CParserData::DT_TAG_VALUE)
431
{
432
node.tag = datastack.top();
433
}
434
else
435
{
436
return -1;
437
}
438
439
//node.tag = datastack.top();
440
//else if(datastack.top().type == CParserData::DT_TAG_VALUE)
441
//{
442
// node.tag = datastack.top();
443
//}
444
datastack.pop();
445
}
446
else
447
{
448
// std::cout << "type : " << datastack.top().type << std::endl;
449
return -1;
450
}
451
452
if(CheckTag(html, node.tag, data) != 0)
453
return -1;
454
455
node.level = datastack.size();
456
nodeque.push_front(node);
457
}
458
else if(data.type == CParserData::DT_DONE)
459
{
460
if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
461
{
462
CParserData data = datastack.top();
463
datastack.pop();
464
if(datastack.top().type != CParserData::DT_TAG)
465
return -1;
466
datastack.top().type = CParserData::DT_TAG_VALUE;
467
datastack.top().vstart = data.start;
468
datastack.top().vend = data.end;
469
}
470
471
// datastack.push(data);
472
473
TNodeData node;
474
node.tag = data;
475
476
node.level = datastack.size();
477
nodeque.push_front(node);
478
}
479
else
480
{
481
return -1;
482
}
483
return 0;
484
}
485
486
int CDocumentObject::CheckSpecialTag(const std::wstring& html, const CParserData& data) const
487

{
488
std::wstring tag = html.substr(data.start + 1, data.end - data.start - 1);
489
std::wstring::size_type pos = tag.find(L" ");
490
if(pos != std::wstring::npos)
491
tag = tag.substr(0, pos);
492
493
if(tag == L"IMG")
494
return 0;
495
496
return -1;
497
}
498
499
int CDocumentObject::CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const
500

{
501
std::wstring str = html.substr(tag.start + 1, tag.end - tag.start - 1);
502
std::wstring::size_type pos = str.find(L" ");
503
if(pos != std::wstring::npos)
504
str = str.substr(0, pos);
505
506
if(str != html.substr(end.start + 2, end.end - end.start - 2))
507
{
508
// std::cout << "tag : " << str << " -- end : " << html.substr(end.start + 2, end.end - end.start - 2) << std::endl;
509
return -1;
510
}
511
return 0;
512
}
513
514
CElementObject* CDocumentObject::MakeElement(const std::wstring& html, const CDocumentObject::TNodeData &node, CElementObject *parent, CElementObject *sibling) const
515

{
516
std::auto_ptr<CElementObject> ele(new CElementObject);
517
518
ele->level = node.level;
519
520
if(node.tag.type == CParserData::DT_TAG)
521
{
522
ele->type = ET_NODE;
523
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
524
}
525
else if(node.tag.type == CParserData::DT_DONE)
526
{
527
ele->type = ET_TAG;
528
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 2);
529
}
530
else if(node.tag.type == CParserData::DT_TAG_VALUE)
531
{
532
ele->tag = ET_NODE;
533
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
534
}
535
else
536
return NULL;
537
538
if(node.value.type == CParserData::DT_VALUE)
539
{
540
ele->type = ET_ELEMENT;
541
if(node.tag.type == CParserData::DT_TAG)
542
ele->value = html.substr(node.value.start, node.value.end - node.value.start);
543
else
544
ele->value = html.substr(node.tag.vstart, node.tag.vend - node.tag.vstart) + L"%" + html.substr(node.value.start, node.value.end - node.value.start);
545
}
546
547
if(ele->Analyse() != 0)
548
{
549
return NULL;
550
}
551
552
if(parent != NULL)
553
parent->child = ele.get();
554
ele->parent = parent;
555
ele->sibling = sibling;
556
557
//std::cout << "element: tag - " << ele->tag << std::endl;
558
559
return ele.release();
560
}
561
562
void CDocumentObject::Show(std::wostream &os) const
563

{
564
if(_root != NULL)
565
ShowElement(os, _root);
566
}
567
568
void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const
569

{
570
const CElementObject* pe = e, *ps = e->sibling;
571
572
pe->Show(os);
573
574
pe = pe->child;
575
if(pe != NULL)
576
{
577
ShowElement(os, pe);
578
}
579
if(ps != NULL)
580
{
581
ShowElement(os, ps);
582
}
583
}
584
585
void CDocumentObject::FreeElement(CElementObject* root)
586

{
587
CElementObject* pe = root->child, *ps = root->sibling;
588
589
// std::cout << "free:" << root->tag << std::endl;
590
591
if(root != NULL)
592
{
593
free(root);
594
root = NULL;
595
}
596
597
if(pe != NULL)
598
{
599
FreeElement(pe);
600
}
601
if(ps != NULL)
602
{
603
FreeElement(ps);
604
}
605
}
606
607
const CElementObject* CDocumentObject::FindFirstElement(const std::wstring &tag)
608

{
609
if(_root == NULL)
610
return NULL;
611
612
_findtag = tag;
613
while(!_findstack.empty())
614
_findstack.pop();
615
616
return FindElement(NULL, _root, _findtag, _findstack);
617
}
618
619
const CElementObject* CDocumentObject::FindNextElement()
620

{
621
if(_findstack.empty())
622
return NULL;
623
624
return FindElement(NULL, _findstack.top()->child, _findtag, _findstack);
625
}
626
627
const CElementObject* CDocumentObject::FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
628

{
629
if(element == NULL)
630
return NULL;
631
632
while(!tmpstack.empty())
633
tmpstack.pop();
634
635
return FindElement(element, element, tag, tmpstack);
636
}
637
638
const CElementObject* CDocumentObject::FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
639

{
640
if(tmpstack.empty())
641
return NULL;
642
643
return FindElement(element, tmpstack.top()->child, tag, tmpstack);
644
}
645
646
const CElementObject* CDocumentObject::FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack)
647

{
648
while(pe != NULL)
649
{
650
stack.push(pe);
651
if(pe->tag == tag)
652
return pe;
653
pe = pe->child;
654
}
655
656
while(!stack.empty() && stack.top() != root && pe == NULL)
657
{
658
pe = stack.top()->sibling;
659
stack.pop();
660
}
661
662
if(pe == NULL)
663
return NULL;
664
665
return FindElement(root, pe, tag, stack);
666
}
667
668
const CAttributeObject* CDocumentObject::FindAttribute(const TinyHtmlParser::CElementObject *element, const std::wstring &attr)
669

{
670
if(element == NULL)
671
return NULL;
672
673
const CAttributeObject* pa = element->attrib;
674
while(pa != NULL)
675
{
676
if(pa->attr == attr)
677
return pa;
678
pa = pa->next;
679
}
680
return pa;
681
}
682
683
}