近来LingosHook更新很慢,是因为比较忙,当然是工作了。。。(我就不说我的夜生活很丰富。。。)
找到一个HTML分析重复的问题,就是Tidy和PreProcess两个过程有重复操作,导致Dict数据定位不准确,终于还是让我‘想起来’了。。。这样下个版本应该快了~
这里贴下HtmlDictParser对象,此对象用于分析HTML中的Dict数据,跟DictObject的区别是其只分析Dict本身数据,如ID,和单词数据外,不再像DictObject对象那样要分析具体词典结果数据。HtmlDictParser是LingosHook摆脱Dict限制的主要对象。
#ifndef __HTMLDICTPARSER_H__
#define __HTMLDICTPARSER_H__
#include <map>
#include <vector>
#include "wx/wx.h"
#include "DBAccess.h"
#include "TinyHtmlParser.h"
namespace HtmlDictParser
{
struct TDictConfig
{
int m_iLoadParam;
int m_iStoreParam;
};
typedef std::map<int, TDictConfig> TDictConfigMap;//index + config
struct TDictInfo
{
std::wstring m_strDictID;
std::wstring m_strTitle;
TDictConfig m_stConfig;
};
typedef std::map<std::wstring, int> TDictIDMap;//dictid + dictindex
typedef std::map<int, TDictInfo> TDictIndexMap;//dictindex + info
class CDictInfoObject
{
public:
CDictInfoObject() {}
virtual ~CDictInfoObject() {}
int Init(CDBAccess::TDatabase& db);
int Insert(int index, const TDictInfo& info);
int GetDictIndex(const std::wstring& id) const;
protected:
TDictIDMap _mapDictID;
TDictIndexMap _mapDictIndex;
};
struct TDictResult
{
int m_iDictIndex;
int m_iDictStart;
int m_iDictEnd;
};
typedef std::vector<TDictResult> TDictResultVector;
typedef std::map<std::wstring, TDictResultVector> TDictResultMap;
class CParser
{
public:
CParser() {}
virtual ~CParser() {}
virtual int Init(CDBAccess::TDatabase& db);
virtual int ParserHTML(const std::wstring& html, TDictResultVector& result);
virtual int ParserHTML(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& html, TinyHtmlParser::CDocumentObject& doc, const TinyHtmlParser::CElementObject* dict, TDictResultMap& result);
virtual int SaveResult(CDBAccess::TDatabase& db, int wordid, const TDictResultMap& result);
virtual int GetResult(CDBAccess::TDatabase& db, int wordid, TDictResultMap& result);
virtual int RemoveResult(CDBAccess::TDatabase& db, int wordid);
virtual int GenHtmlResult(const TDictResultVector& vct, const std::wstring& html, wxString& result) const;
protected:
int CheckDictHtml();
int UpdateDictInfo(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& html, TinyHtmlParser::CDocumentObject& doc, const TinyHtmlParser::CElementObject* dict);
int UpdateDictInfo(CDBAccess::TDatabase &db, const std::wstring& dictid, const std::wstring& title);
protected:
CDictInfoObject _objDictInfo;
};
}
#endif