TinyHtmlParser更新完成,为了方便分解HTML,又新增了一个元素类型--ET_VALUE。此类型将HTML的TAG和其VALUE分离开,使得VALUE成为独立的元素。当然,为了保持原有的代码不改动,在分解HTML数据时,是否将VALUE作为单独元素处理是可以选择的。
int Load(const std::wstring& str, bool strict = true, bool valueseparate = false); 实际TinyHtmlParser最早就是这个样子的,只是后来为了让TAG跟VALUE有所关联,才将VALUE放置到TAG对象内部的。唉,写代码有时也很纠结的。。。
这次主要说明下的是新增的CDocumentOutputObject类。此类用于输出HTML分解后的CDocumentObject中的数据,而且是非常灵活的输出。比如,可以指定某些或者全部TAG、VALUE或者ATTRIB不输出。
class CDocumentOutputObject
data:image/s3,"s3://crabby-images/f86b7/f86b7e502a0580d5e24db72fe38f81dda2bc052d" alt=""
data:image/s3,"s3://crabby-images/3ee79/3ee79ec5a9b7f3dd33bbbdc97980715db1aa9f00" alt=""
{
protected:
typedef std::set<std::wstring> TKeySet;
typedef std::stack<std::wstring> TTagStack;
public:
data:image/s3,"s3://crabby-images/db282/db282e9ea79ad6a7617774c9b676a45b33d46480" alt=""
enum KeyType
{ KT_TAG = 0, KT_VALUE, KT_ATTRIB, KT_ALL_TAG, KT_ALL_VALUE, KT_ALL_ATTRIB };
typedef std::map<KeyType, TKeySet> TKeyMap;
public:
static void AddKey(TKeyMap* keymap, KeyType type, const wxString& str = wxEmptyString);
static void RemoveKey(TKeyMap* keymap, KeyType type, const wxString& str = wxEmptyString);
static bool IsKey(const TKeyMap* keymap, KeyType type, const wxString& str);
data:image/s3,"s3://crabby-images/6c6b8/6c6b84e662455f8092d9c42e3a86036cd3a28be1" alt=""
static int Rewrite(const CDocumentObject& doc, wxString& ostr, const TKeyMap* exclude = NULL);
protected:
static void RewriteElement(wxString& ostr, const TinyHtmlParser::CElementObject* root, const CElementObject* e, TTagStack& tagstack, const TKeyMap* exclude);
static bool IsKey(const TKeyMap* keymap, KeyType type, const std::wstring& str);
private:
static void RewriteTag(wxString& ostr, const CElementObject* e, TTagStack& tagstack, const TKeyMap* exclude);
static void RewriteTagEnd(wxString& ostr, const CElementObject* e, TTagStack& tagstack, const TKeyMap* exclude);
static void RewriteAttrib(wxString& ostr, const CElementObject* e, const TKeyMap* exclude);
static void RewriteValue(wxString& ostr, const CElementObject* e, const TKeyMap* exclude);
}; 当所有的TAG不输出时,是不是我们就可以得到HTML中的存TEXT数据了?
wxString ret;
CDocumentOutputObject::TKeyMap exclude;
//CDocumentOutputObject::AddKey(&exclude, CDocumentOutputObject::KT_TAG, wxT("IMG"));
CDocumentOutputObject::AddKey(&exclude, CDocumentOutputObject::KT_ALL_TAG);
//CDocumentOutputObject::AddKey(&exclude, CDocumentOutputObject::KT_ALL_VALUE);
CDocumentOutputObject::AddKey(&exclude, CDocumentOutputObject::KT_ALL_ATTRIB);
data:image/s3,"s3://crabby-images/13de6/13de6130588e8a001331bf125b484ea2f97d951e" alt=""
CDocumentOutputObject::Rewrite(doc, ret, &exclude); 哈哈。。。有时我觉得我真的很强力啊。。。(低调,低调。。。哈哈。。。)