utf-8格式的xml指令,存储在标准的std::string中,怎么把这个string转化成普通的多字节的string
utf-8编码中汉字是3个字节或以上的,普通的多字节不是这样表示的....
#include <stdio.h>
#include <string.h>
// UTF-8的unicode表示方法到unicode的值转换函数
bool utf82unicode(unsigned int byte[], int index, int count, int& unicode)
{
/* for (int i=index; i < count; ++i) {
printf("byte[%d]:%0Xn",i, byte[i]);
}
printf("byte[index] & 0x80: %0Xn", byte[index] & 0x80);
printf("byte[index] & 0xE0: %0Xn", byte[index] & 0xE0);
printf("byte[index] & 0xF0: %0Xn", byte[index] & 0xF0);
*/
if (index >= count) return false;
if ( (byte[index] & 0x80) == 0x0) // 一位
{
unicode = byte[index];
}
else if ((byte[index] & 0xE0) == 0xC0) // 两位
{
if (index + 1 >= count ) return false;
unicode = (((int)(byte[index] & 0x1F)) < < 6)
| (byte[ index + 1] & 0x3F);
}
else if ((byte[index] & 0xF0) == 0xE0) // 三位
{
if (index + 2 >= count) return false;
unicode = (((int)(byte[index] & 0x0F)) < < 12)
| (((int)(byte[index + 1] & 0x3F)) < < 6)
| (byte[index + 2] & 0x3F);
}
else if ((byte[index] & 0xF8) == 0xF0) // 四位
{
if (index + 3 >= count) return false;
unicode = (((int)(byte[index] & 0x07)) < < 18)
| (((int)(byte[index + 1] & 0x3F)) < < 12)
| (((int)(byte[index + 2] & 0x3F)) < < 6)
| (byte[index + 3] & 0x3F);
}
else if ((byte[index] & 0xFC) == 0xF8) // 五位
{
if (index + 4 >= count) return false;
unicode = (((int)(byte[index] & 0x03)) < < 24)
| (((int)(byte[index + 1] & 0x3F)) < < 18)
| (((int)(byte[index + 2] & 0x3F)) < < 12)
| (((int)(byte[index + 3] & 0x3F)) < < 6)
| (byte[index + 4] & 0x3F);
}
else if ((byte[index] & 0xFE) == 0xFC) // 六位
{
if (index + 5 >= count) return false;
unicode = (((int)(byte[index] & 0x01)) < < 30)
| (((int)(byte[index + 1] & 0x3F)) < < 24)
| (((int)(byte[index + 2] & 0x3F)) < < 18)
| (((int)(byte[index + 3] & 0x3F)) < < 12)
| (((int)(byte[index + 4] & 0x3F)) < < 6)
| (byte[index + 5] & 0x3F);
}
else
{
return false;
}
return true;
}
然后unicode到多字节