1 #include "VAUTF8.h"
2 #include <windows.h>
3
4 bool UTF8_Unicode_Possible =true;
5 char cUTF8Hdr[] = {(char)0xEF,(char)0xBB,(char)0xBF,0};
6
7 int UTF8CharLen(charin)
8 {
9 unsigned char uin = (unsigned char)in;
10
11 if (uin <128)
12 return1;
13
14 if (uin <192)
15 return-1;
16
17 if (uin <0xE0)
18 return2;
19
20 if (uin <0xF0)
21 return3;
22
23 if (uin <0xF8)
24 return4;
25
26 if (uin <0xFC)
27 return5;
28
29 if (uin <0xFE)
30 return6;
31
32 if (uin <0xFF)
33 return7;
34
35 return8;
36 }
37 int IsUTF8(constchar* src, size_t max_source_len)
38 {
39 if (max_source_len <0)
40 return0;
41
42 if (max_source_len ==0)
43 return1;
44
45 while (*src && max_source_len--)
46 {
47 int bytes = UTF8CharLen(*src++);
48 if (bytes <0)
49 return0;
50 if (static_cast<int>(max_source_len) <--bytes)
51 return0;
52 while (bytes--) {
53 if ((*src++&0xC0) !=0x80)
54 return0;
55 }
56 }
57
58 return1;
59 }
60
61 //===================================================================================================
62 // 宽字节字符串转化为UTF8
63 int _stdcall WStr2UTF8(constchar* source, char** dest)
64 {
65 int len =1;
66
67 if (source)
68 len = WStr2UTF8(source, NULL, 0);
69
70 *dest = (char*)malloc(len);
71
72 if (!source) {
73 *dest =0;
74 return1;
75 }
76
77 return WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)source, -1,
78 *dest, len, NULL, NULL);
79 }
80
81 int _stdcall WStr2UTF8(const wchar_t* source, char** dest)
82 {
83 return WStr2UTF8((char*)source, dest);
84 }
85
86 int _stdcall WStr2UTF8(constchar* source, char* dest, int max_len)
87 {
88 if (dest) {
89 if (source!=dest) {
90 return WideCharToMultiByte(CP_UTF8, 0,
91 (LPCWSTR)source, -1, dest, max_len, NULL, NULL);
92 } else {
93 int dest_size = WStr2UTF8(source, NULL, 0);
94
95 char* cTemp = NULL;
96 WStr2UTF8(source, &cTemp);
97 strcpy_s(dest, max_len, cTemp);
98 free(cTemp);
99
100 return dest_size;
101 }
102 } else {
103 return WideCharToMultiByte(CP_UTF8,0,(LPCWSTR)source,-1,NULL,0,NULL,NULL);
104 }
105
106 return0;
107 }
108 //===================================================================================================
109
110 //===================================================================================================
111 // 短字节字符串转化为宽字节字符串
112 int _stdcall Str2WStr(constchar* source, char* dest, int max_len)
113 {
114 if (!source)
115 {
116 memset(dest, 0, 2);
117 return2;
118 }
119 size_t source_len =1+ strlen(source);
120
121 if (source!=dest)
122 {
123 if (!dest)
124 return2* MultiByteToWideChar(CP_THREAD_ACP, 0, source, -1, NULL, 0);
125
126
127 return2*MultiByteToWideChar(CP_THREAD_ACP,0,source,-1,(LPWSTR)dest,max_len/2);
128 }
129 else
130 {
131 char* cTemp =newchar[2* source_len];
132 int i =2*MultiByteToWideChar(CP_THREAD_ACP,0,source,-1,(LPWSTR)cTemp,max_len/2);
133 memcpy(dest, cTemp, i);
134 delete[] cTemp;
135 return i;
136 }
137 }
138
139 int _stdcall Str2WStr(constchar* source, char** dest)
140 {
141 if (!source)
142 {
143 *dest =newchar[2];
144 memset(*dest, 0, 2);
145 return2;
146 }
147 int dest_len = Str2WStr(source, NULL, 0);
148 *dest = (char*)calloc(1, dest_len);
149 return2*MultiByteToWideChar(CP_THREAD_ACP,0,source,-1,(LPWSTR)*dest,dest_len/2);
150 }
151 //===================================================================================================
152
153
154 //===================================================================================================
155 // 宽字节字符串转化为短字节字符串
156 int _stdcall WStr2Str(constchar* source, char* dest, int max_len)
157 {
158 int len = WideCharToMultiByte(CP_THREAD_ACP, 0, (LPCWSTR)source, -1,
159 (LPSTR)dest, max_len, NULL, NULL);
160 return len;
161 }
162
163 int _stdcall WStr2Str(constchar* source, char** dest)
164 {
165 int len =1;
166 if (source)
167 len = WideCharToMultiByte(CP_THREAD_ACP,0,(LPCWSTR)source,-1,NULL,0,0,0);
168 *dest = (char*)malloc(len);
169 return WideCharToMultiByte(CP_THREAD_ACP, 0, (LPCWSTR)source,
170 -1, *dest, len, 0, 0);
171 }
172 //===================================================================================================
173
174
175 //===================================================================================================
176 // 短字节字符串转化到UTF8字符串
177 int _stdcall Str2UTF8(constchar* source, char* dest, int max_len)
178 {
179 if (!source)
180 {
181 *dest =0;
182 return1;
183 }
184
185 if (max_len <0)
186 return0;
187
188 int temp_size;
189 size_t source_len = strlen(source) +1;
190 if (UTF8_Unicode_Possible)
191 {
192 temp_size = Str2WStr(source, (char*)NULL);
193 } else {
194 temp_size =1+(int)strlen(source);
195 }
196 int i;
197
198 unsigned short* temp =new unsigned short[temp_size];
199
200 if (UTF8_Unicode_Possible) {
201 ZeroMemory(temp,sizeof(unsigned short) * temp_size);
202
203 if (dest) {
204 MultiByteToWideChar(CP_THREAD_ACP,0,source,-1,(LPWSTR)temp,temp_size);
205 i = WideCharToMultiByte(CP_UTF8,0,(LPCWSTR)temp,-1,dest,max_len,0,0);
206 delete[] temp;
207 return i;
208 } else {
209 MultiByteToWideChar(CP_THREAD_ACP,0,source,-1,(LPWSTR)temp,temp_size);
210 i = WideCharToMultiByte(CP_UTF8,0,(LPCWSTR)temp,-1,0,0,0,0);
211 delete[] temp;
212 return i;
213 }
214 } else {
215 delete[] temp;
216 if (dest) {
217 if ((int)source_len < max_len)
218 strcpy_s(dest, max_len, source);
219 else {
220 strncpy_s(dest, max_len, source, max_len);
221 dest[(int)max_len-1] =0;
222 }
223 }
224 return1+(int)strlen(source);
225 }
226
227 }
228
229 int _stdcall Str2UTF8(constchar* source, char** dest)
230 {
231 if (!dest)
232 return-1;
233
234 if (!source) {
235 *dest = (char*)calloc(1, 1);
236 return1;
237 }
238
239 if (UTF8_Unicode_Possible) {
240 unsigned short* temp = NULL;
241 Str2WStr(source, (char**)&temp);
242 int result = WStr2UTF8((char*)temp, dest);
243 free(temp);
244 return result;
245 } else {
246 *dest = _strdup(source);
247 return (int)(1+strlen(source));
248 }
249 }
250 //===================================================================================================
251
252
253
254 //===================================================================================================
255 // UTF8串转化到短字节字符串
256 int _stdcall UTF82Str(constchar* source, char** dest)
257 {
258 if (!dest) {
259 return-1;
260 }
261
262 if (!source) {
263 *dest = (char*)calloc(1, 1);
264 return1;
265 }
266
267 unsigned short* temp = NULL;
268
269 if (UTF8_Unicode_Possible) {
270 UTF82WStr(source,(char**)&temp);
271 int dest_len = WideCharToMultiByte(CP_THREAD_ACP,0,(LPCWSTR)temp,-1,0,0,0,0);
272
273 if (dest) {
274 *dest = (char*)calloc(1, dest_len);
275 int r = WideCharToMultiByte(CP_THREAD_ACP,0,(LPCWSTR)temp,-1,*dest,dest_len,0,0);
276 free(temp);
277 return r;
278 } else {
279 int r = WideCharToMultiByte(CP_THREAD_ACP,0,(LPCWSTR)temp,-1,0,0,0,0);
280 free(temp);
281 return r;
282 }
283 } else {
284 *dest = _strdup(source);
285 return (int)strlen(*dest)+1;
286 }
287 }
288
289 int _stdcall UTF82Str(constchar* source, char* dest, int max_len)
290 {
291 int i;
292
293 if (!source) {
294 if (dest)
295 *dest =0;
296 return1;
297 }
298
299 unsigned short* temp = NULL;
300
301 if (UTF8_Unicode_Possible) {
302 UTF82WStr(source, (char**)&temp);
303 if (dest) {
304 i = WideCharToMultiByte(CP_THREAD_ACP,0,(LPCWSTR)temp,-1,dest,max_len,0,0);
305 delete[] temp;
306 return i;
307 } else {
308 i = WideCharToMultiByte(CP_THREAD_ACP,0,(LPCWSTR)temp,-1,0,0,0,0);
309 delete[] temp;
310 return i;
311 }
312 } else {
313 delete[] temp;
314 if (dest)
315 strcpy_s(dest, max_len, source);
316
317 return (int)strlen(source);
318 }
319 }
320 //===================================================================================================
321
322 //===================================================================================================
323 // UTF8串转化到宽字节字符串
324 int _stdcall UTF82WStr(constchar* source, char** dest)
325 {
326 size_t source_len = strlen(source) +1;
327 int dest_len =2;
328
329 if (source)
330 dest_len =2* MultiByteToWideChar(CP_UTF8, 0, source, -1, 0, 0);
331
332 if (dest) {
333 *dest = (char*)malloc(dest_len);
334 returnsizeof(wchar_t)*MultiByteToWideChar(CP_UTF8, 0, source, -1,
335 (LPWSTR)*dest, dest_len /sizeof(wchar_t));
336 } else {
337 returnsizeof(wchar_t)*MultiByteToWideChar(CP_UTF8, 0, source, -1, 0, 0);
338 }
339 }
340
341
342 int _stdcall UTF82WStr(constchar* source, char* dest, int max_len)
343 {
344 int i;
345
346 if (!source)
347 return0;
348
349 size_t source_len = strlen(source) +1;
350
351 if (dest) {
352 if (source!=dest) {
353 returnsizeof(wchar_t) * MultiByteToWideChar(CP_UTF8, 0, source, -1,
354 (LPWSTR)dest, max_len /sizeof(wchar_t));
355 } else {
356 char* cTemp = (char*)malloc(UTF82WStr(source, NULL, 0));
357 i =sizeof(wchar_t) * MultiByteToWideChar(CP_UTF8, 0, source,
358 -1, (LPWSTR)cTemp, max_len /sizeof(wchar_t));
359 memcpy(dest, cTemp, i);
360 free(cTemp);
361 return i;
362 }
363 } else {
364 return2*MultiByteToWideChar(CP_UTF8,0,source,-1,0,0);
365 }
366 }
367
368 //===================================================================================================
369
370
371 int StringConvert( constchar* source, nsVAUTF8::eCharacterEncodingMode source_format,/* int max_source_len,*/char** dest, nsVAUTF8::eCharacterEncodingMode dest_format )
372 {
373 char* _source = (char*)source;
374 switch (source_format)
375 {
376 case nsVAUTF8::ANSI:
377 switch (dest_format) {
378 case nsVAUTF8::ANSI: *dest = _strdup(_source); break;
379 case nsVAUTF8::UTF8: Str2UTF8(_source, dest); break;
380 case nsVAUTF8::UTF16LE: Str2WStr(_source, dest); break;
381 }
382 break;
383 case nsVAUTF8::UTF8:
384 switch (dest_format) {
385 case nsVAUTF8::ANSI: UTF82Str(_source, dest); break;
386 case nsVAUTF8::UTF8: *dest = _strdup(_source); break;
387 case nsVAUTF8::UTF16LE: UTF82WStr(_source, dest); break;
388 }
389 break;
390 case nsVAUTF8::UTF16LE:
391 switch (dest_format) {
392 case nsVAUTF8::ANSI:
393 WStr2Str(_source, dest);
394 break;
395 case nsVAUTF8::UTF8:
396 WStr2UTF8(_source, dest);
397 break;
398 case nsVAUTF8::UTF16LE:
399 *dest = (char*)_wcsdup((wchar_t*)_source);
400 break;
401 }
402 break;
403 }
404 return1;
405 }
406
407 int FromUTF8(constchar* source, wchar_t** dest)
408 {
409 return StringConvert(source, nsVAUTF8::UTF8,
410 (char**)dest, nsVAUTF8::UTF16LE);
411 }
412
413 int FromUTF8(constchar* source, char** dest)
414 {
415 return StringConvert(source, nsVAUTF8::UTF8,
416 (char**)dest, nsVAUTF8::ANSI);
417 }
418
419 int ToUTF8(constchar* source, char** dest)
420 {
421 return StringConvert(source, nsVAUTF8::ANSI,
422 (char**)dest, nsVAUTF8::UTF8);
423 }
424
425 int ToUTF8(const wchar_t* source, char** dest)
426 {
427 return StringConvert((char*)source, nsVAUTF8::UTF16LE,
428 (char**)dest, nsVAUTF8::UTF8);
429 }
430
431 void utf8_EnableRealUnicode( bool bEnabled )
432 {
433 UTF8_Unicode_Possible = bEnabled;
434 }
435
436 bool utf8_IsUnicodeEnabled()
437 {
438 return UTF8_Unicode_Possible;
439 }
440 VAUTF8::VAUTF8( constchar* pSrc, int Encoding )
441 {
442 if (pSrc)
443 {
444 if (Encoding == nsVAUTF8::UTF8)
445 {
446 m_sUTF8 = pSrc;
447 }
448 else
449 {
450 m_sANSI = pSrc;
451 }
452
453 Complete();
454 }
455
456 }
457
458
459 VAUTF8::VAUTF8( constchar* pSrc )
460 {
461 if (pSrc)
462 {
463 if (IsUTF8(pSrc, strlen(pSrc)))
464 {
465 m_sUTF8 = pSrc;
466 }
467 else
468 {
469 m_sANSI = pSrc;
470 }
471
472 Complete();
473 }
474 }
475
476 VAUTF8::VAUTF8( const wchar_t* pSrc )
477 {
478 if (pSrc)
479 {
480 m_sUNICODE = pSrc;
481 Complete();
482 }
483 }
484
485 VAUTF8::VAUTF8( const EncodingStirngA& src )
486 {
487 if (IsUTF8(src.c_str(), src.size()))
488 {
489 m_sUTF8 = src;
490 }
491 else
492 {
493 m_sANSI = src;
494 }
495
496 Complete();
497 }
498
499
500 VAUTF8::VAUTF8( const EncodingStirngW& src )
501 {
502 m_sUNICODE = src;
503 Complete();
504 }
505
506 VAUTF8::VAUTF8( const VAUTF8& other )
507 {
508 *this= other;
509 }
510
511
512 VAUTF8& VAUTF8::operator=(const VAUTF8& rhs )
513 {
514 m_sUTF8 = rhs.m_sUTF8;
515 Complete();
516 return*this;
517 }
518
519 void VAUTF8::Complete()
520 {
521 char* p = NULL;
522
523 if (!m_sANSI.empty())
524 {
525 Str2UTF8(m_sANSI.c_str(), &p);
526 m_sUTF8 = p;
527 free(p);
528
529 Str2WStr(m_sANSI.c_str(), &p);
530 m_sUNICODE = (wchar_t*)p;
531 free(p);
532 }
533 else
534 {
535 if (!m_sUTF8.empty())
536 {
537 UTF82Str((char*)m_sUTF8.c_str(), &p);
538 m_sANSI = p;
539 free(p);
540
541 UTF82WStr((char*)m_sUTF8.c_str(), &p);
542 m_sUNICODE = (wchar_t*)p;
543 free(p);
544 }
545 else
546 {
547 if (!m_sUNICODE.empty())
548 {
549 WStr2Str((char*)m_sUNICODE.c_str(), &p);
550 m_sANSI = p;
551 free(p);
552
553 WStr2UTF8((char*)m_sUNICODE.c_str(), &p);
554 m_sUTF8 = p;
555 free(p);
556 }
557 }
558 }
559 }
560