最近程序用到了天气预报的东西,网上百度,查了一些资料参考(
http://g.kehou.com/t1029846752.html)。
根据资料中的提示,写了个程序,用于抓取所有省及所属城市代码并写入到c:\CityList.mxl 文件中。
代码如下:
1 /********************************************************************
2 created: 2013/06/11
3 created: 11:6:2013 19:11
4 filename: d:\HETU\Test\WIFetch\WIFetch\main.cpp
5 file path: d:\HETU\Test\WIFetch\WIFetch
6 file base: main
7 file ext: cpp
8 author: Fanze
9
10 purpose: 从weather.com.cn抓取城市名称和代码信息
11 *********************************************************************/
12
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <Windows.h>
16 #include <WinInet.h>
17 #include <vector>
18 #include "ZCharSetUtil.h"
19 using namespace std ;
20
21 #pragma comment(lib ,"Wininet")
22
23 struct NCInfo
24 {
25 char Name[32] ;
26 char Code[32] ;
27 };
28
29 typedef vector<NCInfo*> VTNCInfo ;
30 typedef vector<NCInfo*>::iterator VTNCInfoIT ;
31
32 struct SLInfo : public NCInfo
33 {
34 VTNCInfo VTCityInfo ;
35 };
36 typedef vector<SLInfo*> VTSLInfo ;
37 typedef vector<SLInfo*>::iterator VTSLInfoIT ;
38
39 enum TASK_DOWNRET
40 {
41 TASK_DOWN_OK = 0,
42 TASK_DOWN_FAIL = 1,
43 TASK_DOWN_CANCEL = 2,
44 };
45
46 const char* pStartToken = "line-content\">" ;
47 const char* pEndToken = "</td>" ;
48
49 const char* g_strXMLHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n" ;
50 const char* g_strRootTag = "<list>\r\n" ;
51
52 const char* g_strCityListURL = "http://www.weather.com.cn/data/list3/" ;
53 static char* g_strSLURL = "http://www.weather.com.cn/data/list3/city.xml?level=1" ;
54
55 ///< 利用wininet下载文件
56 static inline TASK_DOWNRET InetDownFile(const TCHAR* strSoureURL ,unsigned char* pResultBuf ,int nResultBufLen ,int* ResultLen)
57 {
58 HINTERNET hInternet ;
59 HINTERNET hURLInternet ;
60 BYTE Buf[2048] ={0} ;
61 DWORD dwReads = 0 ;
62 DWORD dwTotal = 0 ;
63 int nSize = 0 ;
64 TASK_DOWNRET Ret = TASK_DOWN_OK ;
65
66 if(strSoureURL == NULL || pResultBuf == NULL || ResultLen == 0){
67 return TASK_DOWN_FAIL ;
68 }
69
70 ///< 删除URL缓存,总是从服务器下载最新的文件。
71 DeleteUrlCacheEntry(strSoureURL) ;
72
73 hInternet = InternetOpen(NULL ,INTERNET_OPEN_TYPE_DIRECT ,
74 NULL ,NULL ,0) ;
75 if(hInternet == NULL){
76 return TASK_DOWN_FAIL ;
77 }
78
79 hURLInternet = InternetOpenUrl(hInternet ,strSoureURL ,NULL ,0
80 ,INTERNET_FLAG_EXISTING_CONNECT|INTERNET_FLAG_NO_CACHE_WRITE ,NULL) ;
81 if(hURLInternet == NULL)
82 {
83 InternetCloseHandle(hInternet) ;
84 DWORD dwErr = GetLastError() ;
85 return TASK_DOWN_FAIL ;
86 }
87
88 while(InternetReadFile(hURLInternet ,Buf ,2048 ,&dwReads))
89 {
90 if(dwReads == 0){
91 Ret = TASK_DOWN_OK ;
92 break ;
93 }
94 nSize = ((dwTotal + dwReads) > (nResultBufLen - dwTotal)) ? (nResultBufLen - dwTotal) : dwReads ;
95 memcpy(pResultBuf + dwTotal ,Buf ,dwReads) ;
96 dwTotal += dwReads ;
97 }
98
99 *ResultLen = dwTotal ;
100
101 InternetCloseHandle(hURLInternet) ;
102 InternetCloseHandle(hInternet) ;
103 return Ret ;
104 }
105
106 void ParseItem(char* str ,NCInfo* pInfo)
107 {
108 char* pPos = str ;
109 char* pSave = NULL ;
110 char Temp[32] ={0} ;
111 if(str == NULL || pInfo == NULL)
112 return ;
113
114 pSave = pInfo->Code ;
115 while(*pPos != '\0')
116 {
117 if(*pPos =='|')
118 {
119 pInfo->Code[pInfo->Code - pSave] ;
120 pSave = pInfo->Name ;
121 }else
122 {
123 *pSave = *pPos ;
124 pSave++ ;
125 }
126 pPos++ ;
127 }
128 UTF82ASCII(pInfo->Name ,Temp) ;
129 printf("Name:%s \r\n" ,Temp) ;
130 UTF82ASCII(pInfo->Code ,Temp) ;
131 printf("Code:%s \r\n" ,Temp) ;
132 }
133
134 void Parse(char* str ,VTNCInfo* pVTInfo)
135 {
136 char* pPos = str ;
137 char Temp[32] ;
138 int nIndex = 0 ;
139 NCInfo* pInfo = NULL ;
140 while(*pPos != '\0')
141 {
142 if(*pPos == ',')
143 {
144 Temp[nIndex] = '\0' ;
145 ///< 解析一小段
146 pInfo = new NCInfo ;
147 memset(pInfo ,0 ,sizeof(NCInfo)) ;
148 ParseItem(Temp ,pInfo) ;
149 pVTInfo->push_back(pInfo) ;
150 nIndex = 0 ;
151 }else
152 {
153 Temp[nIndex] = *pPos ;
154 nIndex ++ ;
155 }
156 pPos++ ;
157 }
158 if(nIndex != 0)
159 {
160 Temp[nIndex] = '\0' ;
161 pInfo = new NCInfo ;
162 memset(pInfo ,0 ,sizeof(NCInfo)) ;
163 ParseItem(Temp ,pInfo) ;
164 pVTInfo->push_back(pInfo) ;
165 }
166 }
167
168 void ProcReplyContent(char* pSrc ,char* pSaveBuffer)
169 {
170 char* pPos = NULL ;
171 char* pEndPos = NULL ;
172 pPos = strstr(pSrc ,pStartToken) ;
173 if(pPos == NULL)
174 return ;
175 pPos += strlen(pStartToken) ;
176 pEndPos = strstr(pPos ,pEndToken) ;
177 if(pEndPos == NULL)
178 return ;
179
180 strncpy(pSaveBuffer ,pPos ,pEndPos - pPos) ;
181 pSaveBuffer[pEndPos - pPos] = '\0' ;
182 }
183
184 ///< 解析省级代码,获取城市列表代码
185 void ParseSLCityInfo(SLInfo* pSLInfo ,VTNCInfo* pCityList)
186 {
187 char RequestURL[128] ={0} ;
188 char Buf[2048+1] ;
189 char Content[1024] ;
190 int nResultLen = 0 ;
191
192 if(pSLInfo == NULL || pCityList == NULL)
193 {
194 return ;
195 }
196 ///< 生成请求URL
197 sprintf(RequestURL ,"%scity%s.xml?level=2" ,g_strCityListURL ,pSLInfo->Code) ;
198 if(InetDownFile(RequestURL ,(unsigned char*)Buf ,2048 ,&nResultLen) != TASK_DOWN_OK){
199 return ;
200 }
201 Buf[nResultLen] ='\0' ;
202 Parse(Buf ,pCityList) ;
203 }
204
205 ///< 根据省一级的代码获取,所有城市列表
206 void ParseSLInfo(VTNCInfo* pVTSLList ,VTSLInfo* pVTSLInfoList)
207 {
208 VTNCInfoIT IT ;
209 SLInfo* pSLInfo = NULL ;
210 NCInfo* pNCInfo = NULL ;
211 int nIndex = 0 ;
212
213 for(IT = pVTSLList->begin() ; IT != pVTSLList->end() ; IT++)
214 {
215 pNCInfo = *IT ;
216 if(pNCInfo == NULL)
217 continue ;
218
219 pSLInfo = new SLInfo ;
220 strcpy(pSLInfo->Name ,pNCInfo->Name) ;
221 strcpy(pSLInfo->Code ,pNCInfo->Code) ;
222 Sleep(1000) ;
223 ParseSLCityInfo(pSLInfo ,&pSLInfo->VTCityInfo) ;
224
225 pVTSLInfoList->push_back(pSLInfo) ;
226 nIndex ++ ;
227 }
228 }
229
230 ///< 写入XML文件头
231 void WriteXMLHeader(FILE* fp)
232 {
233 fwrite(g_strXMLHeader ,1 ,strlen(g_strXMLHeader) ,fp) ;
234 fwrite(g_strRootTag ,1 ,strlen(g_strRootTag) ,fp) ;
235 }
236
237 ///< 写入XML文件尾
238 void WriteXMLTail(FILE* fp)
239 {
240 fprintf(fp ,"</list>\r\n") ;
241 }
242
243 ///< 写入一个省、城市列表信息
244 void WriteXMLSLItem(FILE* fp ,SLInfo* pSLInfo)
245 {
246 VTNCInfoIT itt ;
247 NCInfo* pNCInfo = NULL ;
248 fprintf(fp ,"\t<sl>\r\n") ;
249 fprintf(fp ,"\t\t<name>%s</name>\r\n" ,pSLInfo->Name) ;
250 fprintf(fp ,"\t\t<code>%s</code>\r\n" ,pSLInfo->Code) ;
251 fprintf(fp ,"\t\t<city>\r\n") ;
252 for(itt = pSLInfo->VTCityInfo.begin() ; itt != pSLInfo->VTCityInfo.end() ; itt++)
253 {
254 pNCInfo = *itt ;
255 if(pNCInfo == NULL)
256 continue ;
257 fprintf(fp ,"\t\t\t<item name=\"%s\" code=\"%s\" />\r\n" ,pNCInfo->Name ,pNCInfo->Code) ;
258 }
259 fprintf(fp ,"\t\t</city>\r\n") ;
260 fprintf(fp ,"\t</sl>\r\n") ;
261
262 }
263
264 ///< 将结果保存至文件
265 void SaveToFile(char* strDstFile ,VTSLInfo* pSLInfoVT)
266 {
267 FILE* pSaveFile = NULL ;
268 VTSLInfoIT itt ;
269 SLInfo* pInfo = NULL ;
270 pSaveFile = fopen(strDstFile ,"wb+") ;
271 WriteXMLHeader(pSaveFile) ;
272 for(itt = pSLInfoVT->begin() ; itt != pSLInfoVT->end() ; itt++)
273 {
274 pInfo = *itt ;
275 if(pInfo == NULL)
276 continue ;
277 WriteXMLSLItem(pSaveFile ,pInfo) ;
278 }
279 WriteXMLTail(pSaveFile) ;
280 fclose(pSaveFile) ;
281 }
282
283 void FreeNCVt(VTNCInfo* pVTInfo)
284 {
285 VTNCInfoIT it ;
286 NCInfo* pInfo = NULL ;
287 for(it = pVTInfo->begin() ; it != pVTInfo->end() ; it++)
288 {
289 pInfo = *it ;
290 if(pInfo != NULL)
291 {
292 delete pInfo ;
293 }
294 }
295 pVTInfo->clear() ;
296 }
297
298 int main()
299 {
300 FILE* fp = NULL ;
301 char strSLFile[128] ;
302 char *pBuffer = new char[1024*4+1] ;
303 int nReadLen = 0 ;
304 VTNCInfo VTInfo ;
305 VTSLInfo VSLInfo ;
306
307 if(InetDownFile(g_strSLURL ,(unsigned char*)pBuffer ,1024*4 ,&nReadLen) != TASK_DOWN_OK){
308 printf("下载省级列表失败 \r\n") ;
309 return 1 ;
310 }
311 pBuffer[nReadLen] = '\0' ;
312
313 ///< 解析省级城市和代码
314 Parse(pBuffer ,&VTInfo) ;
315
316 ///< 开始处理省级文件
317 ParseSLInfo(&VTInfo ,&VSLInfo) ;
318
319 ///< 写入文件
320 printf("开始将文件存入c:\\CityList.xml\r\n") ;
321 SaveToFile("c:\\CityList.xml" ,&VSLInfo) ;
322 delete[] pBuffer ;
323
324 VTSLInfoIT it ;
325 SLInfo* pSLInfo = NULL ;
326 for(it = VSLInfo.begin() ; it != VSLInfo.end() ; it ++)
327 {
328 pSLInfo = *it ;
329 if(pSLInfo == NULL)
330 continue ;
331 FreeNCVt(&pSLInfo->VTCityInfo) ;
332 delete pSLInfo ;
333 }
334
335 FreeNCVt(&VTInfo) ;
336
337 printf("所有处理已完成,按任意键结束。") ;
338 getchar() ;
339 return 0 ;
340 }