Unicode和UTF-8之间的转换详解

zc qq:1337220912

utf-8格式的xml指令，存储在标准的std::string中，怎么把这个string转化成普通的多字节的string
utf-8编码中汉字是3个字节或以上的，普通的多字节不是这样表示的....

#include <stdio.h>
#include <string.h>

// UTF-8的unicode表示方法到unicode的值转换函数
bool utf82unicode(unsigned int byte[], int index, int count, int& unicode)
{
/* for (int i=index; i < count; ++i) {
printf("byte[%d]:%0Xn",i, byte[i]);
}
printf("byte[index] & 0x80: %0Xn", byte[index] & 0x80);
printf("byte[index] & 0xE0: %0Xn", byte[index] & 0xE0);
printf("byte[index] & 0xF0: %0Xn", byte[index] & 0xF0);
*/
if (index >= count) return false;
if ( (byte[index] & 0x80) == 0x0) // 一位
{
unicode = byte[index];
}
else if ((byte[index] & 0xE0) == 0xC0) // 两位
{
if (index + 1 >= count ) return false;
unicode = (((int)(byte[index] & 0x1F)) < < 6)
| (byte[ index + 1] & 0x3F);
}
else if ((byte[index] & 0xF0) == 0xE0) // 三位
{
if (index + 2 >= count) return false;
unicode = (((int)(byte[index] & 0x0F)) < < 12)
| (((int)(byte[index + 1] & 0x3F)) < < 6)
| (byte[index + 2] & 0x3F);
}
else if ((byte[index] & 0xF8) == 0xF0) // 四位
{
if (index + 3 >= count) return false;
unicode = (((int)(byte[index] & 0x07)) < < 18)
| (((int)(byte[index + 1] & 0x3F)) < < 12)
| (((int)(byte[index + 2] & 0x3F)) < < 6)
| (byte[index + 3] & 0x3F);
}
else if ((byte[index] & 0xFC) == 0xF8) // 五位
{
if (index + 4 >= count) return false;
unicode = (((int)(byte[index] & 0x03)) < < 24)
| (((int)(byte[index + 1] & 0x3F)) < < 18)
| (((int)(byte[index + 2] & 0x3F)) < < 12)
| (((int)(byte[index + 3] & 0x3F)) < < 6)
| (byte[index + 4] & 0x3F);
}
else if ((byte[index] & 0xFE) == 0xFC) // 六位
{
if (index + 5 >= count) return false;
unicode = (((int)(byte[index] & 0x01)) < < 30)
| (((int)(byte[index + 1] & 0x3F)) < < 24)
| (((int)(byte[index + 2] & 0x3F)) < < 18)
| (((int)(byte[index + 3] & 0x3F)) < < 12)
| (((int)(byte[index + 4] & 0x3F)) < < 6)
| (byte[index + 5] & 0x3F);
}
else
{
return false;
}
return true;

}

然后unicode到多字节

posted on 2012-07-18 17:30 多彩人生阅读(273) 评论(0) 编辑收藏引用

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！



网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

colorful

Unicode和UTF-8之间的转换详解

导航

统计

常用链接

留言簿(3)

随笔分类

随笔档案

搜索

最新评论

阅读排行榜

评论排行榜