Lingoes的在线词典结果显示利用了浏览器的一个特性--持续显示,就是说对于浏览器来说,有多少HTML就显示多少,边显示边下载后续HTML。从使用的感觉上看,Lingoes显示结果时,会看到先本地词典结果,然后窗口变化显示在线词典结果;而从实现上看,就是先输出本地文件的HTML到浏览器,等在线词典的结果到达后再继续输出在线词典的HTML,于是就可以看到很多词典内容了。
这个方法很好,如果网速好,使用起来根本没有感觉是否是在使用网络数据。但此方法对于LingosHook来说有时就是灾难。举例说,当Hook发现有结果显示时,马上抓取输送到浏览器的HTML数据,并解析结果,如果网络不够好,那么只有本地数据,如果网络足够好,那么就有全数据,但如果网络不好不坏的情况下,Hook就会抓到“破碎”数据,导致解析失败。这也是有时Hook无法抓取结果的原因--解析HTML数据出错。
为解决此问题有两种方案,一是提高HTML解析的容错能力,二是等待HTML“全”数据到达后再解析。对于第二种方法来说,当前还不太可行,一来当前还没有截取到“全”数据到达的事件,二来如果网络不好,等待时间过长,反而会增加“丢失数据”的几率。因此,目前的法子就是提高Hook对HTML的容错能力。
从分析结果看,“破碎”数据不等于“混乱”数据,就是说HTML只是缺失,且只是缺失连续数据的后半段,而不是Tag混乱。例如下面数据就是所谓的“破碎”数据例子:
<HTML><BODY>this is context.</BODY>

<HTML><BODY>this is context.</B

<HTML><BODY>this is
因此针对此情况,修改了TinyHtmlParser,增加了一个PreParserBroken()函数,以实现对以上情况的支持。代码如下。
1
#ifndef __TINYHTMLPARSER_H__
2
#define __TINYHTMLPARSER_H__
3
4
#include <iostream>
5
#include <string>
6
#include <queue>
7
#include <stack>
8
9
namespace TinyHtmlParser
10

{
11
12
enum ElementType
{ ET_UNKNOWN = -1, ET_TAG = 0, ET_NODE, ET_ELEMENT };//0:just a tag, 1:no value, 2:have value
13
14
class CAttributeObject
15

{
16
public:
17
CAttributeObject(const std::wstring& a, const std::wstring& v)
18
: attr(a), value(v), next(NULL)
19
{
20
}
21
virtual ~CAttributeObject()
{}
22
23
void Show(std::wostream& os) const;
24
public:
25
std::wstring attr;
26
std::wstring value;
27
CAttributeObject* next;
28
};
29
30
class CElementObject
31

{
32
public:
33
CElementObject();
34
virtual ~CElementObject();
35
36
virtual int Analyse();
37
38
const CAttributeObject* FindAttribute(const std::wstring& attr) const;
39
40
void Show(std::wostream& os) const;
41
protected:
42
int AnalyseAttribute(const std::wstring& attr);
43
int MakeAttribute(const std::wstring& attr);
44
int MakeAttribute(const std::wstring& attr, const std::wstring& value);
45
void FreeAnalyseAttribute();
46
int AnalyseValue();
47
public:
48
ElementType type;
49
size_t level;
50
CElementObject* parent;
51
CElementObject* child;
52
CElementObject* sibling;
53
54
CAttributeObject* attrib;
55
public:
56
std::wstring tag;
57
std::wstring value;
58
};
59
60
class CParserData
61

{
62
public:
63
enum DataType
{ DT_UNKNOWN = -1, DT_TAG = 0, DT_VALUE, DT_END, DT_DONE, DT_TAG_VALUE, DT_BROKEN };
64
public:
65
CParserData()
66
: type(DT_UNKNOWN)
67
, start(0)
68
, vstart(0)
69
{
70
}
71
virtual ~CParserData()
{}
72
73
public:
74
DataType type;
75
size_t start;
76
size_t end;
77
size_t vstart;
78
size_t vend;
79
};
80
81
class CDocumentObject
82

{
83
protected:
84
static const wchar_t TAG_LT = L'<';
85
static const wchar_t TAG_GT = L'>';
86
static const wchar_t TAG_SLASH = L'/';
87
static const wchar_t TAG_BSLASH = L'\\';
88
static const wchar_t TAG_AND = L'&';
89
90
typedef std::stack<CParserData> TDataStack;
91
struct TNodeData
92
{
93
size_t level;
94
CParserData tag;
95
CParserData value;
96
// CParserData end;
97
};
98
typedef std::deque<TNodeData> TNodeQueue;
99
public:
100
typedef std::stack<const CElementObject*> TElementStack;
101
public:
102
CDocumentObject();
103
virtual ~CDocumentObject();
104
105
int Load(const std::wstring& str, bool strict = true);
106
107
const CElementObject* Root() const
{ return _root; }
108
109
const CElementObject* FindFirstElement(const std::wstring& tag);
110
const CElementObject* FindNextElement();
111
112
const CElementObject* FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
113
const CElementObject* FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
114
115
const CAttributeObject* FindAttribute(const CElementObject* element, const std::wstring& attr);
116
117
bool IsMistake() const
{ return _bIsMistake; }
118
119
void Show(std::wostream& os) const;
120
protected:
121
int PreProcess(const std::wstring& str, std::wstring& html, bool strict);
122
int PreParser(const std::wstring& html, TNodeQueue& que, bool strict);
123
int Parser(const std::wstring& html, TNodeQueue& que, bool strict);
124
private:
125
int PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data);
126
int PushValueData(const CParserData& data, TDataStack& datastack) const;
127
int PushTagData(const std::wstring& html, const CParserData& data, TDataStack& datastack, TNodeQueue& nodeque) const;
128
int PreParserBroken(const std::wstring& html, TDataStack& datastack, TNodeQueue& nodeque) const;
129
130
int CheckSpecialTag(const std::wstring& html, const CParserData& data) const;
131
int CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const;
132
CElementObject* MakeElement(const std::wstring& html, const TNodeData& node, CElementObject* parent, CElementObject* sibling) const;
133
134
void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const;
135
136
void FreeElement(CElementObject* root);
137
138
const CElementObject* FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack);
139
private:
140
CElementObject* _root;
141
private:
142
std::wstring _findtag;
143
TElementStack _findstack;
144
private:
145
bool _bIsMistake;
146
};
147
148
}
149
150
#endif
151
1
2
#include "TinyHtmlParser.h"
3
4
namespace TinyHtmlParser
5

{
6
7
void CAttributeObject::Show(std::wostream& os) const
8

{
9
os << " attr : " << this->attr << " -- value = " << this->value << std::endl;
10
}
11
12
CElementObject::CElementObject()
13
: type(ET_UNKNOWN)
14
, level(0)
15
, parent(NULL)
16
, child(NULL)
17
, sibling(NULL)
18
, attrib(NULL)
19

{
20
}
21
22
CElementObject::~CElementObject()
23

{
24
FreeAnalyseAttribute();
25
}
26
27
int CElementObject::Analyse()
28

{
29
std::wstring str = tag;
30
31
std::wstring::size_type pos = str.find(L" ");
32
if(pos != std::wstring::npos)
33
{
34
tag = str.substr(0, pos);
35
36
str = str.substr(pos + 1);
37
if(AnalyseAttribute(str) != 0)
38
{
39
return -1;
40
}
41
}
42
if(type == ET_ELEMENT)
43
{
44
if(AnalyseValue() != 0)
45
return -1;
46
}
47
return 0;
48
}
49
50
int CElementObject::AnalyseAttribute(const std::wstring& attr)
51

{
52
if(attr.size() == 0)
53
return 0;
54
55
std::wstring a, v;
56
std::wstring::size_type pos = attr.find(L"="), start = 0;
57
while(pos != std::wstring::npos)
58
{
59
a = attr.substr(start, pos - start);
60
if(pos == attr.size() - 1)
61
return -1;
62
start = pos + 1;
63
if(attr[pos + 1] == L'\"')
64
{
65
pos = attr.find(L"\"", start + 1);
66
if(pos == std::wstring::npos)
67
return -1;
68
v = attr.substr(start, pos - start + 1);
69
start = pos + 2;
70
}
71
else
72
{
73
pos = attr.find(L" ", start);
74
if(pos == std::wstring::npos)
75
pos = attr.size();
76
v = attr.substr(start, pos - start);
77
start = pos + 1;
78
}
79
if(MakeAttribute(a, v) != 0)
80
return -1;
81
82
if(start >= attr.size())
83
break;
84
85
pos = attr.find(L"=", start);
86
}
87
return 0;
88
}
89
90
int CElementObject::MakeAttribute(const std::wstring &attr)
91

{
92
std::wstring::size_type pos = attr.find(L"=");
93
if(pos == std::wstring::npos)
94
return -1;
95
96
return MakeAttribute(attr.substr(0, pos), attr.substr(pos));
97
}
98
99
int CElementObject::MakeAttribute(const std::wstring &attr, const std::wstring& value)
100

{
101
std::auto_ptr<CAttributeObject> obj(new CAttributeObject(attr, value));//attr.substr(0, pos), attr.substr(pos)));
102
103
if(attrib != NULL)
104
{
105
CAttributeObject* tmp = attrib;
106
while(tmp->next != NULL)
107
tmp = tmp->next;
108
tmp->next = obj.release();
109
}
110
else
111
{
112
attrib = obj.release();
113
}
114
return 0;
115
}
116
117
118
void CElementObject::FreeAnalyseAttribute()
119

{
120
CAttributeObject* tmp = attrib;
121
while(attrib != NULL)
122
{
123
tmp = attrib->next;
124
delete attrib;
125
attrib = tmp;
126
}
127
128
}
129
130
int CElementObject::AnalyseValue()
131

{
132
std::wstring::size_type pos = this->value.find(L" ");
133
while(pos != std::wstring::npos)
134
{
135
this->value.replace(pos, 6, L" ");
136
pos = this->value.find(L" ", pos + 1);
137
}
138
139
return 0;
140
}
141
142
const CAttributeObject* CElementObject::FindAttribute(const std::wstring& attr) const
143

{
144
const CAttributeObject* pa = this->attrib;
145
while(pa != NULL)
146
{
147
if(pa->attr == attr)
148
return pa;
149
pa = pa->next;
150
}
151
return pa;
152
}
153
154
void CElementObject::Show(std::wostream& os) const
155

{
156
os << "[" << this->level << "]" << "Tag : " << this->tag;
157
if(this->type == ET_ELEMENT)
158
os << " -- value = " << /**//*std::wstring*/(this->value);
159
os << std::endl;
160
161
const CAttributeObject* attr = this->attrib;
162
while(attr != NULL)
163
{
164
attr->Show(os);
165
attr = attr->next;
166
}
167
os << std::endl;
168
}
169
//
170
171
CDocumentObject::CDocumentObject()
172
: _root(NULL)
173
, _bIsMistake(false)
174

{
175
}
176
177
CDocumentObject::~CDocumentObject()
178

{
179
if(_root != NULL)
180
FreeElement(_root);
181
}
182
183
int CDocumentObject::Load(const std::wstring &str, bool strict)
184

{
185
std::wstring html;
186
if(PreProcess(str, html, strict) != 0)
187
return -1;
188
TNodeQueue que;
189
if(PreParser(html, que, strict) != 0)
190
return -1;
191
if(Parser(html, que, strict) != 0)
192
return -1;
193
return 0;
194
}
195
196
int CDocumentObject::PreProcess(const std::wstring& str, std::wstring& html, bool strict)
197

{
198
//html = str;
199
bool tag = false;
200
for(std::wstring::const_iterator it = str.begin(); it != str.end(); ++ it)
201
{
202
if(*it == TAG_LT)
203
{
204
if(tag == true)
205
return -1;
206
tag = true;
207
}
208
else if(*it == TAG_GT)
209
{
210
if(tag == false)
211
return -1;
212
tag = false;
213
}
214
else
215
{
216
if(tag == false)
217
{
218
if(isspace((unsigned char)*it) != 0)
219
continue;
220
}
221
}
222
html += *it;
223
}
224
225
return 0;
226
}
227
228
int CDocumentObject::PreParser(const std::wstring& html, CDocumentObject::TNodeQueue& que, bool strict)
229

{
230
std::wstring::size_type pos = 0;
231
232
if(html.size() == 0)
233
return -1;
234
if(html[pos] != TAG_LT)
235
return -1;
236
237
TDataStack datastack;
238
239
CParserData data;
240
241
while(pos < html.size())
242
{
243
if(html[pos] == TAG_LT)
244
{
245
if(pos > data.start)
246
{
247
data.type = CParserData::DT_VALUE;
248
data.end = pos;
249
250
if(PushValueData(data, datastack) != 0)
251
return -1;
252
}
253
254
if(PreParserLT(html, pos, data) != 0)
255
break;
256
if(PushTagData(html, data, datastack, que) != 0)
257
return -1;
258
259
++ pos;
260
data.start = pos;
261
}
262
//else if(html[pos] == TAG_GT || html[pos] == TAG_SLASH)
263
//{
264
// return -1;
265
//}
266
else
267
{
268
++ pos;
269
}
270
}
271
272
if(datastack.size() > 0)
273
{
274
if(strict)
275
return -1;
276
277
if(pos > data.start)
278
{
279
data.type = CParserData::DT_BROKEN;
280
data.end = pos;
281
282
if(PushValueData(data, datastack) != 0)
283
return -1;
284
}
285
286
if(PreParserBroken(html, datastack, que) != 0)
287
return -1;
288
}
289
290
return 0;
291
}
292
293
int CDocumentObject::Parser(const std::wstring& html, CDocumentObject::TNodeQueue& que, bool strict)
294

{
295
CElementObject *pe = NULL, *pp = NULL, *ps = NULL;
296
size_t level = 0;
297
while(que.size()> 0)
298
{
299
const TNodeData &node = que.front();
300
if(level < que.front().level)
301
{
302
pp = pe;
303
ps = NULL;
304
}
305
else if(level == que.front().level)
306
{
307
ps = pe;
308
}
309
else//>
310
{
311
ps = pe;
312
pp = pe->parent;
313
int t = level - que.front().level;
314
while(t > 0)
315
{
316
ps = ps->parent;
317
pp = pp->parent;
318
-- t;
319
}
320
}
321
level = que.front().level;
322
323
pe = MakeElement(html, que.front(), pp, ps);
324
325
if(pe == NULL)
326
return -1;
327
328
que.pop_front();
329
}
330
331
if(pp != NULL)
332
{
333
while(pp->parent != NULL)
334
pp = pp->parent;
335
_root = pp;
336
}
337
else
338
_root = pe;
339
340
return 0;
341
}
342
343
int CDocumentObject::PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data)
344

{
345
if(pos == html.size() - 1)
346
return -1;
347
348
data.start = pos;
349
350
++ pos;
351
352
if(html[pos] != TAG_SLASH)
353
{
354
data.type = CParserData::DT_TAG;
355
}
356
else
357
{
358
data.type = CParserData::DT_END;
359
++ pos;
360
}
361
362
while(pos < html.size())
363
{
364
if(html[pos] == TAG_GT)
365
{
366
if(html[pos - 1] == TAG_SLASH)
367
{
368
data.type = CParserData::DT_DONE;
369
}
370
371
data.end = pos;
372
373
return 0;
374
}
375
else if(html[pos] == TAG_LT)
376
{
377
return -1;
378
}
379
380
++ pos;
381
}
382
383
return -1;
384
}
385
386
int CDocumentObject::PushValueData(const TinyHtmlParser::CParserData &data, CDocumentObject::TDataStack &datastack) const
387

{
388
if(datastack.size() == 0)
389
return -1;
390
datastack.push(data);
391
return 0;
392
}
393
394
int CDocumentObject::PushTagData(const std::wstring& html, const CParserData& data, CDocumentObject::TDataStack& datastack, CDocumentObject::TNodeQueue& nodeque) const
395

{
396
if(data.type == CParserData::DT_TAG)
397
{
398
if(CheckSpecialTag(html, data) == 0)
399
{
400
TNodeData node;
401
node.tag = data;
402
403
node.level = datastack.size();
404
nodeque.push_front(node);
405
return 0;
406
}
407
408
if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
409
{
410
CParserData data = datastack.top();
411
datastack.pop();
412
if(datastack.top().type != CParserData::DT_TAG && datastack.top().type != CParserData::DT_TAG_VALUE)//for special condition <t1>v1<t2>v2</t2>v11<t3>v3</t3>v111</t1>
413
return -1;
414
datastack.top().type = CParserData::DT_TAG_VALUE;
415
if(datastack.top().vstart == 0)
416
datastack.top().vstart = data.start;
417
datastack.top().vend = data.end;
418
}
419
420
datastack.push(data);
421
}
422
else if(data.type == CParserData::DT_END)
423
{
424
if(datastack.size() == 0)
425
return -1;
426
427
TNodeData node;
428
if(datastack.top().type == CParserData::DT_TAG || datastack.top().type == CParserData::DT_TAG_VALUE)
429
{
430
node.tag = datastack.top();
431
datastack.pop();
432
}
433
else if(datastack.top().type == CParserData::DT_VALUE)
434
{
435
node.value = datastack.top();
436
437
datastack.pop();
438
439
if(datastack.size() == 0)
440
return -1;
441
442
if(datastack.top().type == CParserData::DT_TAG)
443
{
444
node.tag = datastack.top();
445
}
446
else if(datastack.top().type == CParserData::DT_TAG_VALUE)
447
{
448
node.tag = datastack.top();
449
}
450
else
451
{
452
return -1;
453
}
454
455
//node.tag = datastack.top();
456
//else if(datastack.top().type == CParserData::DT_TAG_VALUE)
457
//{
458
// node.tag = datastack.top();
459
//}
460
datastack.pop();
461
}
462
else
463
{
464
return -1;
465
}
466
467
if(CheckTag(html, node.tag, data) != 0)
468
return -1;
469
470
node.level = datastack.size();
471
nodeque.push_front(node);
472
}
473
else if(data.type == CParserData::DT_DONE)
474
{
475
if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
476
{
477
CParserData data = datastack.top();
478
datastack.pop();
479
if(datastack.top().type != CParserData::DT_TAG)
480
return -1;
481
datastack.top().type = CParserData::DT_TAG_VALUE;
482
datastack.top().vstart = data.start;
483
datastack.top().vend = data.end;
484
}
485
486
// datastack.push(data);
487
488
TNodeData node;
489
node.tag = data;
490
491
node.level = datastack.size();
492
nodeque.push_front(node);
493
}
494
else
495
{
496
return -1;
497
}
498
return 0;
499
}
500
501
int CDocumentObject::PreParserBroken(const std::wstring& html, TDataStack& datastack, TNodeQueue& nodeque) const
502

{
503
while(datastack.size() > 0)
504
{
505
CParserData& data = datastack.top();
506
if(data.type == CParserData::DT_TAG || data.type == CParserData::DT_TAG_VALUE)
507
{
508
TNodeData node;
509
node.tag = data;
510
511
datastack.pop();
512
513
node.level = datastack.size();
514
515
nodeque.push_front(node);
516
}
517
else if(data.type == CParserData::DT_VALUE)
518
{
519
TNodeData node;
520
node.value = data;
521
522
datastack.pop();
523
data = datastack.top();
524
525
if(data.type == CParserData::DT_TAG || data.type == CParserData::DT_TAG_VALUE)
526
{
527
node.tag = data;
528
}
529
else
530
{
531
return -1;
532
}
533
datastack.pop();
534
535
node.level = datastack.size();
536
537
nodeque.push_front(node);
538
}
539
else if(data.type == CParserData::DT_BROKEN)
540
{
541
TNodeData node;
542
node.value = data;
543
544
datastack.pop();
545
data = datastack.top();
546
547
if(data.type == CParserData::DT_TAG || data.type == CParserData::DT_TAG_VALUE)
548
{
549
node.tag = data;
550
}
551
else if(data.type == CParserData::DT_VALUE)
552
{
553
continue;
554
}
555
else
556
{
557
return -1;
558
}
559
datastack.pop();
560
561
node.level = datastack.size();
562
563
nodeque.push_front(node);
564
}
565
}
566
567
return 0;
568
}
569
570
int CDocumentObject::CheckSpecialTag(const std::wstring& html, const CParserData& data) const
571

{
572
std::wstring tag = html.substr(data.start + 1, data.end - data.start - 1);
573
std::wstring::size_type pos = tag.find(L" ");
574
if(pos != std::wstring::npos)
575
tag = tag.substr(0, pos);
576
577
if(tag == L"IMG")
578
return 0;
579
580
return -1;
581
}
582
583
int CDocumentObject::CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const
584

{
585
std::wstring str = html.substr(tag.start + 1, tag.end - tag.start - 1);
586
std::wstring::size_type pos = str.find(L" ");
587
if(pos != std::wstring::npos)
588
str = str.substr(0, pos);
589
590
if(str != html.substr(end.start + 2, end.end - end.start - 2))
591
{
592
return -1;
593
}
594
return 0;
595
}
596
597
CElementObject* CDocumentObject::MakeElement(const std::wstring& html, const CDocumentObject::TNodeData &node, CElementObject *parent, CElementObject *sibling) const
598

{
599
std::auto_ptr<CElementObject> ele(new CElementObject);
600
601
ele->level = node.level;
602
603
if(node.tag.type == CParserData::DT_TAG)
604
{
605
ele->type = ET_NODE;
606
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
607
}
608
else if(node.tag.type == CParserData::DT_DONE)
609
{
610
ele->type = ET_TAG;
611
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 2);
612
}
613
else if(node.tag.type == CParserData::DT_TAG_VALUE)
614
{
615
ele->tag = ET_NODE;
616
ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
617
}
618
else
619
return NULL;
620
621
if(node.value.type == CParserData::DT_VALUE)
622
{
623
ele->type = ET_ELEMENT;
624
if(node.tag.type == CParserData::DT_TAG)
625
ele->value = html.substr(node.value.start, node.value.end - node.value.start);
626
else
627
ele->value = html.substr(node.tag.vstart, node.tag.vend - node.tag.vstart) + L"%" + html.substr(node.value.start, node.value.end - node.value.start);
628
}
629
630
if(ele->Analyse() != 0)
631
{
632
return NULL;
633
}
634
635
if(parent != NULL)
636
parent->child = ele.get();
637
ele->parent = parent;
638
ele->sibling = sibling;
639
640
return ele.release();
641
}
642
643
void CDocumentObject::Show(std::wostream &os) const
644

{
645
if(_root != NULL)
646
ShowElement(os, _root);
647
}
648
649
void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const
650

{
651
const CElementObject* pe = e, *ps = e->sibling;
652
653
pe->Show(os);
654
655
pe = pe->child;
656
if(pe != NULL)
657
{
658
ShowElement(os, pe);
659
}
660
if(ps != NULL)
661
{
662
ShowElement(os, ps);
663
}
664
}
665
666
void CDocumentObject::FreeElement(CElementObject* root)
667

{
668
CElementObject* pe = root->child, *ps = root->sibling;
669
670
// std::cout << "free:" << root->tag << std::endl;
671
672
if(root != NULL)
673
{
674
delete root;
675
root = NULL;
676
}
677
678
if(pe != NULL)
679
{
680
FreeElement(pe);
681
}
682
if(ps != NULL)
683
{
684
FreeElement(ps);
685
}
686
}
687
688
const CElementObject* CDocumentObject::FindFirstElement(const std::wstring &tag)
689

{
690
if(_root == NULL)
691
return NULL;
692
693
_findtag = tag;
694
while(!_findstack.empty())
695
_findstack.pop();
696
697
return FindElement(NULL, _root, _findtag, _findstack);
698
}
699
700
const CElementObject* CDocumentObject::FindNextElement()
701

{
702
if(_findstack.empty())
703
return NULL;
704
705
return FindElement(NULL, _findstack.top()->child, _findtag, _findstack);
706
}
707
708
const CElementObject* CDocumentObject::FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
709

{
710
if(element == NULL)
711
return NULL;
712
713
while(!tmpstack.empty())
714
tmpstack.pop();
715
716
return FindElement(element, element, tag, tmpstack);
717
}
718
719
const CElementObject* CDocumentObject::FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
720

{
721
if(tmpstack.empty())
722
return NULL;
723
724
return FindElement(element, tmpstack.top()->child, tag, tmpstack);
725
}
726
727
const CElementObject* CDocumentObject::FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack)
728

{
729
while(pe != NULL)
730
{
731
stack.push(pe);
732
if(pe->tag == tag)
733
return pe;
734
pe = pe->child;
735
}
736
737
while(!stack.empty() && stack.top() != root && pe == NULL)
738
{
739
pe = stack.top()->sibling;
740
stack.pop();
741
}
742
743
if(pe == NULL)
744
return NULL;
745
746
return FindElement(root, pe, tag, stack);
747
}
748
749
const CAttributeObject* CDocumentObject::FindAttribute(const TinyHtmlParser::CElementObject *element, const std::wstring &attr)
750

{
751
if(element == NULL)
752
return NULL;
753
754
const CAttributeObject* pa = element->attrib;
755
while(pa != NULL)
756
{
757
if(pa->attr == attr)
758
return pa;
759
pa = pa->next;
760
}
761
return pa;
762
}
763
764
}