近来LingosHook更新很慢,是因为比较忙,当然是工作了。。。(我就不说我的夜生活很丰富。。。)
找到一个HTML分析重复的问题,就是Tidy和PreProcess两个过程有重复操作,导致Dict数据定位不准确,终于还是让我‘想起来’了。。。这样下个版本应该快了~
这里贴下HtmlDictParser对象,此对象用于分析HTML中的Dict数据,跟DictObject的区别是其只分析Dict本身数据,如ID,和单词数据外,不再像DictObject对象那样要分析具体词典结果数据。HtmlDictParser是LingosHook摆脱Dict限制的主要对象。
#ifndef __HTMLDICTPARSER_H__
#define __HTMLDICTPARSER_H__

#include <map>
#include <vector>

#include "wx/wx.h"

#include "DBAccess.h"
#include "TinyHtmlParser.h"

namespace HtmlDictParser


{

struct TDictConfig


{
int m_iLoadParam;
int m_iStoreParam;
};

typedef std::map<int, TDictConfig> TDictConfigMap;//index + config

struct TDictInfo


{
std::wstring m_strDictID;
std::wstring m_strTitle;

TDictConfig m_stConfig;
};

typedef std::map<std::wstring, int> TDictIDMap;//dictid + dictindex
typedef std::map<int, TDictInfo> TDictIndexMap;//dictindex + info

class CDictInfoObject


{
public:

CDictInfoObject()
{}

virtual ~CDictInfoObject()
{}

int Init(CDBAccess::TDatabase& db);

int Insert(int index, const TDictInfo& info);
int GetDictIndex(const std::wstring& id) const;
protected:
TDictIDMap _mapDictID;
TDictIndexMap _mapDictIndex;
};

struct TDictResult


{
int m_iDictIndex;

int m_iDictStart;
int m_iDictEnd;
};

typedef std::vector<TDictResult> TDictResultVector;
typedef std::map<std::wstring, TDictResultVector> TDictResultMap;


class CParser


{
public:

CParser()
{}

virtual ~CParser()
{}

virtual int Init(CDBAccess::TDatabase& db);
virtual int ParserHTML(const std::wstring& html, TDictResultVector& result);
virtual int ParserHTML(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& html, TinyHtmlParser::CDocumentObject& doc, const TinyHtmlParser::CElementObject* dict, TDictResultMap& result);

virtual int SaveResult(CDBAccess::TDatabase& db, int wordid, const TDictResultMap& result);
virtual int GetResult(CDBAccess::TDatabase& db, int wordid, TDictResultMap& result);
virtual int RemoveResult(CDBAccess::TDatabase& db, int wordid);

virtual int GenHtmlResult(const TDictResultVector& vct, const std::wstring& html, wxString& result) const;
protected:
int CheckDictHtml();
int UpdateDictInfo(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& html, TinyHtmlParser::CDocumentObject& doc, const TinyHtmlParser::CElementObject* dict);
int UpdateDictInfo(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& title);
protected:
CDictInfoObject _objDictInfo;
};

}

#endif
