Lingoes的在线词典结果显示利用了浏览器的一个特性--持续显示,就是说对于浏览器来说,有多少HTML就显示多少,边显示边下载后续HTML。从使用的感觉上看,Lingoes显示结果时,会看到先本地词典结果,然后窗口变化显示在线词典结果;而从实现上看,就是先输出本地文件的HTML到浏览器,等在线词典的结果到达后再继续输出在线词典的HTML,于是就可以看到很多词典内容了。
这个方法很好,如果网速好,使用起来根本没有感觉是否是在使用网络数据。但此方法对于LingosHook来说有时就是灾难。举例说,当Hook发现有结果显示时,马上抓取输送到浏览器的HTML数据,并解析结果,如果网络不够好,那么只有本地数据,如果网络足够好,那么就有全数据,但如果网络不好不坏的情况下,Hook就会抓到“破碎”数据,导致解析失败。这也是有时Hook无法抓取结果的原因--解析HTML数据出错。
为解决此问题有两种方案,一是提高HTML解析的容错能力,二是等待HTML“全”数据到达后再解析。对于第二种方法来说,当前还不太可行,一来当前还没有截取到“全”数据到达的事件,二来如果网络不好,等待时间过长,反而会增加“丢失数据”的几率。因此,目前的法子就是提高Hook对HTML的容错能力。
从分析结果看,“破碎”数据不等于“混乱”数据,就是说HTML只是缺失,且只是缺失连续数据的后半段,而不是Tag混乱。例如下面数据就是所谓的“破碎”数据例子:
<HTML><BODY>this is context.</BODY>
<HTML><BODY>this is context.</B
<HTML><BODY>this is
因此针对此情况,修改了TinyHtmlParser,增加了一个PreParserBroken()函数,以实现对以上情况的支持。代码如下。
1#ifndef __TINYHTMLPARSER_H__
2#define __TINYHTMLPARSER_H__
3
4#include <iostream>
5#include <string>
6#include <queue>
7#include <stack>
8
9namespace TinyHtmlParser
10{
11
12enum ElementType { ET_UNKNOWN = -1, ET_TAG = 0, ET_NODE, ET_ELEMENT };//0:just a tag, 1:no value, 2:have value
13
14class CAttributeObject
15{
16public:
17 CAttributeObject(const std::wstring& a, const std::wstring& v)
18 : attr(a), value(v), next(NULL)
19 {
20 }
21 virtual ~CAttributeObject() {}
22
23 void Show(std::wostream& os) const;
24public:
25 std::wstring attr;
26 std::wstring value;
27 CAttributeObject* next;
28};
29
30class CElementObject
31{
32public:
33 CElementObject();
34 virtual ~CElementObject();
35
36 virtual int Analyse();
37
38 const CAttributeObject* FindAttribute(const std::wstring& attr) const;
39
40 void Show(std::wostream& os) const;
41protected:
42 int AnalyseAttribute(const std::wstring& attr);
43 int MakeAttribute(const std::wstring& attr);
44 int MakeAttribute(const std::wstring& attr, const std::wstring& value);
45 void FreeAnalyseAttribute();
46 int AnalyseValue();
47public:
48 ElementType type;
49 size_t level;
50 CElementObject* parent;
51 CElementObject* child;
52 CElementObject* sibling;
53
54 CAttributeObject* attrib;
55public:
56 std::wstring tag;
57 std::wstring value;
58};
59
60class CParserData
61{
62public:
63 enum DataType { DT_UNKNOWN = -1, DT_TAG = 0, DT_VALUE, DT_END, DT_DONE, DT_TAG_VALUE, DT_BROKEN };
64public:
65 CParserData()
66 : type(DT_UNKNOWN)
67 , start(0)
68 , vstart(0)
69 {
70 }
71 virtual ~CParserData() {}
72
73public:
74 DataType type;
75 size_t start;
76 size_t end;
77 size_t vstart;
78 size_t vend;
79};
80
81class CDocumentObject
82{
83protected:
84 static const wchar_t TAG_LT = L'<';
85 static const wchar_t TAG_GT = L'>';
86 static const wchar_t TAG_SLASH = L'/';
87 static const wchar_t TAG_BSLASH = L'\\';
88 static const wchar_t TAG_AND = L'&';
89
90 typedef std::stack<CParserData> TDataStack;
91 struct TNodeData
92 {
93 size_t level;
94 CParserData tag;
95 CParserData value;
96// CParserData end;
97 };
98 typedef std::deque<TNodeData> TNodeQueue;
99public:
100 typedef std::stack<const CElementObject*> TElementStack;
101public:
102 CDocumentObject();
103 virtual ~CDocumentObject();
104
105 int Load(const std::wstring& str, bool strict = true);
106
107 const CElementObject* Root() const { return _root; }
108
109 const CElementObject* FindFirstElement(const std::wstring& tag);
110 const CElementObject* FindNextElement();
111
112 const CElementObject* FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
113 const CElementObject* FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack);
114
115 const CAttributeObject* FindAttribute(const CElementObject* element, const std::wstring& attr);
116
117 bool IsMistake() const { return _bIsMistake; }
118
119 void Show(std::wostream& os) const;
120protected:
121 int PreProcess(const std::wstring& str, std::wstring& html, bool strict);
122 int PreParser(const std::wstring& html, TNodeQueue& que, bool strict);
123 int Parser(const std::wstring& html, TNodeQueue& que, bool strict);
124private:
125 int PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data);
126 int PushValueData(const CParserData& data, TDataStack& datastack) const;
127 int PushTagData(const std::wstring& html, const CParserData& data, TDataStack& datastack, TNodeQueue& nodeque) const;
128 int PreParserBroken(const std::wstring& html, TDataStack& datastack, TNodeQueue& nodeque) const;
129
130 int CheckSpecialTag(const std::wstring& html, const CParserData& data) const;
131 int CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const;
132 CElementObject* MakeElement(const std::wstring& html, const TNodeData& node, CElementObject* parent, CElementObject* sibling) const;
133
134 void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const;
135
136 void FreeElement(CElementObject* root);
137
138 const CElementObject* FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack);
139private:
140 CElementObject* _root;
141private:
142 std::wstring _findtag;
143 TElementStack _findstack;
144private:
145 bool _bIsMistake;
146};
147
148}
149
150#endif
151
1
2#include "TinyHtmlParser.h"
3
4namespace TinyHtmlParser
5{
6
7void CAttributeObject::Show(std::wostream& os) const
8{
9 os << " attr : " << this->attr << " -- value = " << this->value << std::endl;
10}
11
12CElementObject::CElementObject()
13: type(ET_UNKNOWN)
14, level(0)
15, parent(NULL)
16, child(NULL)
17, sibling(NULL)
18, attrib(NULL)
19{
20}
21
22CElementObject::~CElementObject()
23{
24 FreeAnalyseAttribute();
25}
26
27int CElementObject::Analyse()
28{
29 std::wstring str = tag;
30
31 std::wstring::size_type pos = str.find(L" ");
32 if(pos != std::wstring::npos)
33 {
34 tag = str.substr(0, pos);
35
36 str = str.substr(pos + 1);
37 if(AnalyseAttribute(str) != 0)
38 {
39 return -1;
40 }
41 }
42 if(type == ET_ELEMENT)
43 {
44 if(AnalyseValue() != 0)
45 return -1;
46 }
47 return 0;
48}
49
50int CElementObject::AnalyseAttribute(const std::wstring& attr)
51{
52 if(attr.size() == 0)
53 return 0;
54
55 std::wstring a, v;
56 std::wstring::size_type pos = attr.find(L"="), start = 0;
57 while(pos != std::wstring::npos)
58 {
59 a = attr.substr(start, pos - start);
60 if(pos == attr.size() - 1)
61 return -1;
62 start = pos + 1;
63 if(attr[pos + 1] == L'\"')
64 {
65 pos = attr.find(L"\"", start + 1);
66 if(pos == std::wstring::npos)
67 return -1;
68 v = attr.substr(start, pos - start + 1);
69 start = pos + 2;
70 }
71 else
72 {
73 pos = attr.find(L" ", start);
74 if(pos == std::wstring::npos)
75 pos = attr.size();
76 v = attr.substr(start, pos - start);
77 start = pos + 1;
78 }
79 if(MakeAttribute(a, v) != 0)
80 return -1;
81
82 if(start >= attr.size())
83 break;
84
85 pos = attr.find(L"=", start);
86 }
87 return 0;
88}
89
90int CElementObject::MakeAttribute(const std::wstring &attr)
91{
92 std::wstring::size_type pos = attr.find(L"=");
93 if(pos == std::wstring::npos)
94 return -1;
95
96 return MakeAttribute(attr.substr(0, pos), attr.substr(pos));
97}
98
99int CElementObject::MakeAttribute(const std::wstring &attr, const std::wstring& value)
100{
101 std::auto_ptr<CAttributeObject> obj(new CAttributeObject(attr, value));//attr.substr(0, pos), attr.substr(pos)));
102
103 if(attrib != NULL)
104 {
105 CAttributeObject* tmp = attrib;
106 while(tmp->next != NULL)
107 tmp = tmp->next;
108 tmp->next = obj.release();
109 }
110 else
111 {
112 attrib = obj.release();
113 }
114 return 0;
115}
116
117
118void CElementObject::FreeAnalyseAttribute()
119{
120 CAttributeObject* tmp = attrib;
121 while(attrib != NULL)
122 {
123 tmp = attrib->next;
124 delete attrib;
125 attrib = tmp;
126 }
127
128}
129
130int CElementObject::AnalyseValue()
131{
132 std::wstring::size_type pos = this->value.find(L" ");
133 while(pos != std::wstring::npos)
134 {
135 this->value.replace(pos, 6, L" ");
136 pos = this->value.find(L" ", pos + 1);
137 }
138
139 return 0;
140}
141
142const CAttributeObject* CElementObject::FindAttribute(const std::wstring& attr) const
143{
144 const CAttributeObject* pa = this->attrib;
145 while(pa != NULL)
146 {
147 if(pa->attr == attr)
148 return pa;
149 pa = pa->next;
150 }
151 return pa;
152}
153
154void CElementObject::Show(std::wostream& os) const
155{
156 os << "[" << this->level << "]" << "Tag : " << this->tag;
157 if(this->type == ET_ELEMENT)
158 os << " -- value = " << /**//*std::wstring*/(this->value);
159 os << std::endl;
160
161 const CAttributeObject* attr = this->attrib;
162 while(attr != NULL)
163 {
164 attr->Show(os);
165 attr = attr->next;
166 }
167 os << std::endl;
168}
169//
170
171CDocumentObject::CDocumentObject()
172: _root(NULL)
173, _bIsMistake(false)
174{
175}
176
177CDocumentObject::~CDocumentObject()
178{
179 if(_root != NULL)
180 FreeElement(_root);
181}
182
183int CDocumentObject::Load(const std::wstring &str, bool strict)
184{
185 std::wstring html;
186 if(PreProcess(str, html, strict) != 0)
187 return -1;
188 TNodeQueue que;
189 if(PreParser(html, que, strict) != 0)
190 return -1;
191 if(Parser(html, que, strict) != 0)
192 return -1;
193 return 0;
194}
195
196int CDocumentObject::PreProcess(const std::wstring& str, std::wstring& html, bool strict)
197{
198 //html = str;
199 bool tag = false;
200 for(std::wstring::const_iterator it = str.begin(); it != str.end(); ++ it)
201 {
202 if(*it == TAG_LT)
203 {
204 if(tag == true)
205 return -1;
206 tag = true;
207 }
208 else if(*it == TAG_GT)
209 {
210 if(tag == false)
211 return -1;
212 tag = false;
213 }
214 else
215 {
216 if(tag == false)
217 {
218 if(isspace((unsigned char)*it) != 0)
219 continue;
220 }
221 }
222 html += *it;
223 }
224
225 return 0;
226}
227
228int CDocumentObject::PreParser(const std::wstring& html, CDocumentObject::TNodeQueue& que, bool strict)
229{
230 std::wstring::size_type pos = 0;
231
232 if(html.size() == 0)
233 return -1;
234 if(html[pos] != TAG_LT)
235 return -1;
236
237 TDataStack datastack;
238
239 CParserData data;
240
241 while(pos < html.size())
242 {
243 if(html[pos] == TAG_LT)
244 {
245 if(pos > data.start)
246 {
247 data.type = CParserData::DT_VALUE;
248 data.end = pos;
249
250 if(PushValueData(data, datastack) != 0)
251 return -1;
252 }
253
254 if(PreParserLT(html, pos, data) != 0)
255 break;
256 if(PushTagData(html, data, datastack, que) != 0)
257 return -1;
258
259 ++ pos;
260 data.start = pos;
261 }
262 //else if(html[pos] == TAG_GT || html[pos] == TAG_SLASH)
263 //{
264 // return -1;
265 //}
266 else
267 {
268 ++ pos;
269 }
270 }
271
272 if(datastack.size() > 0)
273 {
274 if(strict)
275 return -1;
276
277 if(pos > data.start)
278 {
279 data.type = CParserData::DT_BROKEN;
280 data.end = pos;
281
282 if(PushValueData(data, datastack) != 0)
283 return -1;
284 }
285
286 if(PreParserBroken(html, datastack, que) != 0)
287 return -1;
288 }
289
290 return 0;
291}
292
293int CDocumentObject::Parser(const std::wstring& html, CDocumentObject::TNodeQueue& que, bool strict)
294{
295 CElementObject *pe = NULL, *pp = NULL, *ps = NULL;
296 size_t level = 0;
297 while(que.size()> 0)
298 {
299 const TNodeData &node = que.front();
300 if(level < que.front().level)
301 {
302 pp = pe;
303 ps = NULL;
304 }
305 else if(level == que.front().level)
306 {
307 ps = pe;
308 }
309 else//>
310 {
311 ps = pe;
312 pp = pe->parent;
313 int t = level - que.front().level;
314 while(t > 0)
315 {
316 ps = ps->parent;
317 pp = pp->parent;
318 -- t;
319 }
320 }
321 level = que.front().level;
322
323 pe = MakeElement(html, que.front(), pp, ps);
324
325 if(pe == NULL)
326 return -1;
327
328 que.pop_front();
329 }
330
331 if(pp != NULL)
332 {
333 while(pp->parent != NULL)
334 pp = pp->parent;
335 _root = pp;
336 }
337 else
338 _root = pe;
339
340 return 0;
341}
342
343int CDocumentObject::PreParserLT(const std::wstring& html, std::wstring::size_type& pos, CParserData& data)
344{
345 if(pos == html.size() - 1)
346 return -1;
347
348 data.start = pos;
349
350 ++ pos;
351
352 if(html[pos] != TAG_SLASH)
353 {
354 data.type = CParserData::DT_TAG;
355 }
356 else
357 {
358 data.type = CParserData::DT_END;
359 ++ pos;
360 }
361
362 while(pos < html.size())
363 {
364 if(html[pos] == TAG_GT)
365 {
366 if(html[pos - 1] == TAG_SLASH)
367 {
368 data.type = CParserData::DT_DONE;
369 }
370
371 data.end = pos;
372
373 return 0;
374 }
375 else if(html[pos] == TAG_LT)
376 {
377 return -1;
378 }
379
380 ++ pos;
381 }
382
383 return -1;
384}
385
386int CDocumentObject::PushValueData(const TinyHtmlParser::CParserData &data, CDocumentObject::TDataStack &datastack) const
387{
388 if(datastack.size() == 0)
389 return -1;
390 datastack.push(data);
391 return 0;
392}
393
394int CDocumentObject::PushTagData(const std::wstring& html, const CParserData& data, CDocumentObject::TDataStack& datastack, CDocumentObject::TNodeQueue& nodeque) const
395{
396 if(data.type == CParserData::DT_TAG)
397 {
398 if(CheckSpecialTag(html, data) == 0)
399 {
400 TNodeData node;
401 node.tag = data;
402
403 node.level = datastack.size();
404 nodeque.push_front(node);
405 return 0;
406 }
407
408 if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
409 {
410 CParserData data = datastack.top();
411 datastack.pop();
412 if(datastack.top().type != CParserData::DT_TAG && datastack.top().type != CParserData::DT_TAG_VALUE)//for special condition <t1>v1<t2>v2</t2>v11<t3>v3</t3>v111</t1>
413 return -1;
414 datastack.top().type = CParserData::DT_TAG_VALUE;
415 if(datastack.top().vstart == 0)
416 datastack.top().vstart = data.start;
417 datastack.top().vend = data.end;
418 }
419
420 datastack.push(data);
421 }
422 else if(data.type == CParserData::DT_END)
423 {
424 if(datastack.size() == 0)
425 return -1;
426
427 TNodeData node;
428 if(datastack.top().type == CParserData::DT_TAG || datastack.top().type == CParserData::DT_TAG_VALUE)
429 {
430 node.tag = datastack.top();
431 datastack.pop();
432 }
433 else if(datastack.top().type == CParserData::DT_VALUE)
434 {
435 node.value = datastack.top();
436
437 datastack.pop();
438
439 if(datastack.size() == 0)
440 return -1;
441
442 if(datastack.top().type == CParserData::DT_TAG)
443 {
444 node.tag = datastack.top();
445 }
446 else if(datastack.top().type == CParserData::DT_TAG_VALUE)
447 {
448 node.tag = datastack.top();
449 }
450 else
451 {
452 return -1;
453 }
454
455 //node.tag = datastack.top();
456 //else if(datastack.top().type == CParserData::DT_TAG_VALUE)
457 //{
458 // node.tag = datastack.top();
459 //}
460 datastack.pop();
461 }
462 else
463 {
464 return -1;
465 }
466
467 if(CheckTag(html, node.tag, data) != 0)
468 return -1;
469
470 node.level = datastack.size();
471 nodeque.push_front(node);
472 }
473 else if(data.type == CParserData::DT_DONE)
474 {
475 if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
476 {
477 CParserData data = datastack.top();
478 datastack.pop();
479 if(datastack.top().type != CParserData::DT_TAG)
480 return -1;
481 datastack.top().type = CParserData::DT_TAG_VALUE;
482 datastack.top().vstart = data.start;
483 datastack.top().vend = data.end;
484 }
485
486// datastack.push(data);
487
488 TNodeData node;
489 node.tag = data;
490
491 node.level = datastack.size();
492 nodeque.push_front(node);
493 }
494 else
495 {
496 return -1;
497 }
498 return 0;
499}
500
501int CDocumentObject::PreParserBroken(const std::wstring& html, TDataStack& datastack, TNodeQueue& nodeque) const
502{
503 while(datastack.size() > 0)
504 {
505 CParserData& data = datastack.top();
506 if(data.type == CParserData::DT_TAG || data.type == CParserData::DT_TAG_VALUE)
507 {
508 TNodeData node;
509 node.tag = data;
510
511 datastack.pop();
512
513 node.level = datastack.size();
514
515 nodeque.push_front(node);
516 }
517 else if(data.type == CParserData::DT_VALUE)
518 {
519 TNodeData node;
520 node.value = data;
521
522 datastack.pop();
523 data = datastack.top();
524
525 if(data.type == CParserData::DT_TAG || data.type == CParserData::DT_TAG_VALUE)
526 {
527 node.tag = data;
528 }
529 else
530 {
531 return -1;
532 }
533 datastack.pop();
534
535 node.level = datastack.size();
536
537 nodeque.push_front(node);
538 }
539 else if(data.type == CParserData::DT_BROKEN)
540 {
541 TNodeData node;
542 node.value = data;
543
544 datastack.pop();
545 data = datastack.top();
546
547 if(data.type == CParserData::DT_TAG || data.type == CParserData::DT_TAG_VALUE)
548 {
549 node.tag = data;
550 }
551 else if(data.type == CParserData::DT_VALUE)
552 {
553 continue;
554 }
555 else
556 {
557 return -1;
558 }
559 datastack.pop();
560
561 node.level = datastack.size();
562
563 nodeque.push_front(node);
564 }
565 }
566
567 return 0;
568}
569
570int CDocumentObject::CheckSpecialTag(const std::wstring& html, const CParserData& data) const
571{
572 std::wstring tag = html.substr(data.start + 1, data.end - data.start - 1);
573 std::wstring::size_type pos = tag.find(L" ");
574 if(pos != std::wstring::npos)
575 tag = tag.substr(0, pos);
576
577 if(tag == L"IMG")
578 return 0;
579
580 return -1;
581}
582
583int CDocumentObject::CheckTag(const std::wstring& html, const CParserData& tag, const CParserData& end) const
584{
585 std::wstring str = html.substr(tag.start + 1, tag.end - tag.start - 1);
586 std::wstring::size_type pos = str.find(L" ");
587 if(pos != std::wstring::npos)
588 str = str.substr(0, pos);
589
590 if(str != html.substr(end.start + 2, end.end - end.start - 2))
591 {
592 return -1;
593 }
594 return 0;
595}
596
597CElementObject* CDocumentObject::MakeElement(const std::wstring& html, const CDocumentObject::TNodeData &node, CElementObject *parent, CElementObject *sibling) const
598{
599 std::auto_ptr<CElementObject> ele(new CElementObject);
600
601 ele->level = node.level;
602
603 if(node.tag.type == CParserData::DT_TAG)
604 {
605 ele->type = ET_NODE;
606 ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
607 }
608 else if(node.tag.type == CParserData::DT_DONE)
609 {
610 ele->type = ET_TAG;
611 ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 2);
612 }
613 else if(node.tag.type == CParserData::DT_TAG_VALUE)
614 {
615 ele->tag = ET_NODE;
616 ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
617 }
618 else
619 return NULL;
620
621 if(node.value.type == CParserData::DT_VALUE)
622 {
623 ele->type = ET_ELEMENT;
624 if(node.tag.type == CParserData::DT_TAG)
625 ele->value = html.substr(node.value.start, node.value.end - node.value.start);
626 else
627 ele->value = html.substr(node.tag.vstart, node.tag.vend - node.tag.vstart) + L"%" + html.substr(node.value.start, node.value.end - node.value.start);
628 }
629
630 if(ele->Analyse() != 0)
631 {
632 return NULL;
633 }
634
635 if(parent != NULL)
636 parent->child = ele.get();
637 ele->parent = parent;
638 ele->sibling = sibling;
639
640 return ele.release();
641}
642
643void CDocumentObject::Show(std::wostream &os) const
644{
645 if(_root != NULL)
646 ShowElement(os, _root);
647}
648
649void CDocumentObject::ShowElement(std::wostream& os, const CElementObject* e) const
650{
651 const CElementObject* pe = e, *ps = e->sibling;
652
653 pe->Show(os);
654
655 pe = pe->child;
656 if(pe != NULL)
657 {
658 ShowElement(os, pe);
659 }
660 if(ps != NULL)
661 {
662 ShowElement(os, ps);
663 }
664}
665
666void CDocumentObject::FreeElement(CElementObject* root)
667{
668 CElementObject* pe = root->child, *ps = root->sibling;
669
670// std::cout << "free:" << root->tag << std::endl;
671
672 if(root != NULL)
673 {
674 delete root;
675 root = NULL;
676 }
677
678 if(pe != NULL)
679 {
680 FreeElement(pe);
681 }
682 if(ps != NULL)
683 {
684 FreeElement(ps);
685 }
686}
687
688const CElementObject* CDocumentObject::FindFirstElement(const std::wstring &tag)
689{
690 if(_root == NULL)
691 return NULL;
692
693 _findtag = tag;
694 while(!_findstack.empty())
695 _findstack.pop();
696
697 return FindElement(NULL, _root, _findtag, _findstack);
698}
699
700const CElementObject* CDocumentObject::FindNextElement()
701{
702 if(_findstack.empty())
703 return NULL;
704
705 return FindElement(NULL, _findstack.top()->child, _findtag, _findstack);
706}
707
708const CElementObject* CDocumentObject::FindFirstElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
709{
710 if(element == NULL)
711 return NULL;
712
713 while(!tmpstack.empty())
714 tmpstack.pop();
715
716 return FindElement(element, element, tag, tmpstack);
717}
718
719const CElementObject* CDocumentObject::FindNextElement(const CElementObject* element, const std::wstring& tag, TElementStack& tmpstack)
720{
721 if(tmpstack.empty())
722 return NULL;
723
724 return FindElement(element, tmpstack.top()->child, tag, tmpstack);
725}
726
727const CElementObject* CDocumentObject::FindElement(const CElementObject* root, const CElementObject* pe, const std::wstring& tag, TElementStack& stack)
728{
729 while(pe != NULL)
730 {
731 stack.push(pe);
732 if(pe->tag == tag)
733 return pe;
734 pe = pe->child;
735 }
736
737 while(!stack.empty() && stack.top() != root && pe == NULL)
738 {
739 pe = stack.top()->sibling;
740 stack.pop();
741 }
742
743 if(pe == NULL)
744 return NULL;
745
746 return FindElement(root, pe, tag, stack);
747}
748
749const CAttributeObject* CDocumentObject::FindAttribute(const TinyHtmlParser::CElementObject *element, const std::wstring &attr)
750{
751 if(element == NULL)
752 return NULL;
753
754 const CAttributeObject* pa = element->attrib;
755 while(pa != NULL)
756 {
757 if(pa->attr == attr)
758 return pa;
759 pa = pa->next;
760 }
761 return pa;
762}
763
764}