有没有比较好的utf8ios unicode 转码中文转码函数

点击联系发帖人 时间：2016-02-17 06:15

http unicode转码

c中实现utf8和gbk的互转 -
- ITeye技术网站
博客分类：
#include &iconv.h&
#include &stdlib.h&
#include &stdio.h&
#include &unistd.h&
#include &fcntl.h&
#include &string.h&
#include &sys/stat.h&
int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,
char *outbuf, size_t outlen) {
char **pin = &
char **pout = &
cd = iconv_open(to_charset, from_charset);
if (cd == 0)
return -1;
memset(outbuf, 0, outlen);
if (iconv(cd, pin, &inlen, pout, &outlen) == -1)
return -1;
iconv_close(cd);
*pout = '\0';
int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);
int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);
int main(void) {
char *s = "中国";
int fd = open("test.txt", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
char buf[10];
u2g(s, strlen(s), buf, sizeof(buf));
write(fd, buf, strlen(buf));
close(fd);
fd = open("test.txt2", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
char buf2[10];
g2u(buf, strlen(buf), buf2, sizeof(buf2));
write(fd, buf2, strlen(buf2));
close(fd);
上面是使用iconv函数。
方式二：使用如下两个函数
mbstowcs将多字节编码转换为宽字节编码
wcstombs将宽字节编码转换为多字节编码
注意，需要系统编码的支持，可以通过locale -a 查看系统支持的。若不支持zh_CN.gbk, 需要安装，例如，在ubuntu上的安装步骤如下：
$sudo vi /var/lib/locales/supported.d/zh-hans
zh_CN.UTF-8 UTF-8
zh_SG.UTF-8 UTF-8
zh_CN.GBK GBK
zh_CN.GB18030 GB18030
$ sudo locale-gen
$ locale -a
zh_CN.gb18030
zh_CN.utf8
zh_SG.utf8
#include &stdlib.h&
#include &stdio.h&
#include &string.h&
#include &unistd.h&
#include &fcntl.h&
#include &sys/stat.h&
#include &locale.h&
* DESCRIPTION: 实现由utf8编码到gbk编码的转换
* Input: gbkStr,转换后的字符串;
srcStr,待转换的字符串; maxGbkStrlen, gbkStr的最
* Output: gbkStr
* Returns: -1,&0,success
int utf82gbk(char *gbkStr, const char *srcStr, int maxGbkStrlen) {
if (NULL == srcStr) {
printf("Bad Parameter\n");
return -1;
//首先先将utf8编码转换为unicode编码
if (NULL == setlocale(LC_ALL, "zh_CN.utf8")) //设置转换为unicode前的码,当前为utf8编码
printf("Bad Parameter\n");
return -1;
int unicodeLen = mbstowcs(NULL, srcStr, 0); //计算转换后的长度
if (unicodeLen &= 0) {
printf("Can not Transfer!!!\n");
return -1;
wchar_t *unicodeStr = (wchar_t *) calloc(sizeof(wchar_t), unicodeLen + 1);
mbstowcs(unicodeStr, srcStr, strlen(srcStr)); //将utf8转换为unicode
//将unicode编码转换为gbk编码
if (NULL == setlocale(LC_ALL, "zh_CN.gbk")) //设置unicode转换后的码,当前为gbk
printf("Bad Parameter\n");
return -1;
int gbkLen = wcstombs(NULL, unicodeStr, 0); //计算转换后的长度
if (gbkLen &= 0) {
printf("Can not Transfer!!!\n");
return -1;
} else if (gbkLen &= maxGbkStrlen) //判断空间是否足够
printf("Dst Str memory not enough\n");
return -1;
wcstombs(gbkStr, unicodeStr, gbkLen);
gbkStr[gbkLen] = 0; //添加结束符
free(unicodeStr);
return gbkL
int main(void) {
char *s = "中国";
int fd = open("test.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
char buf[10];
utf82gbk(buf, s, sizeof(buf));
write(fd, buf, strlen(buf));
close(fd);
浏览: 183176 次
来自: 杭州
nk_tocean 写道照着做了，但是不行啊，还是乱码.先确认 ...
照着做了，但是不行啊，还是乱码.
您好，能不能把语言包给我发过来，我找不到。谢谢 1790958 ...
修改配置路径到 JDK 安装目录下的 jre 亦可
搂主是正确的,刚刚招到原因,我自己写了一个serde,里面用了 ...推荐这篇日记的豆列
······共有 1944 人关注过本帖
标题：字符编码转换UTF8-&UNICODE，UNICODE-&UTF8
等　级：新手上路
帖　子：10
结帖率：66.67%
&&已结贴√
&&问题点数：10&&回复次数：7&&&
字符编码转换UTF8-&UNICODE，UNICODE-&UTF8
最近接触到字符编码，先搞一个程序实现：输入字符存入文件，然后把字符转换为UNICODE，再把UNICODE转换为UTF8.&&&&&& 话说，这个时要用到位运算还是什么，冥想好久，没有任何思路，希望
有大神可以提示一下，打开思路，最好能详细讲，谢谢
搜索更多相关主题的帖子:
来　自：青藏高原
等　级：贵宾
威　望：52
帖　子：3489
专家分：11291
等　级：新手上路
帖　子：10
回复 2楼 love云彩
问了一天的度娘了……
等　级：本版版主
威　望：300
帖　子：25056
专家分：47570
位运算？开玩笑呢你！我看你连自己提到的两种编码是怎么回事都不大清楚吧，百度一天都没收获？
授人以渔，不授人以鱼。
等　级：新手上路
帖　子：10
回复 4楼 TonyDeng
这个确实不懂，就是仅仅在网上找资料了解一下。
等　级：本版版主
威　望：300
帖　子：25056
专家分：47570
&&得分:10&
C++库有专用转换函数的，不是数学运算那么简单，它与字符映射代码页有关，相同的编码，不同的代码页对应不同的字符，utf-8甚至是不定长的字符编码，而Unicode是定长的（还有Unicode-16和Unicode-32之分）。首先这不是什么位运算的问题，不过是最基本的查表法而已，但你没有完整的数据表就是白搭；其次不同的编码系统互相转换，不是一个运算法则就能搞定的，针对不同的编码，有不同的转换算法。别想这种编程了，有现成的转换库函数和API可用，你自己是搞不定的。
单是Unicode，在中文Windows下就有GB2312和Unicode-16的区别，这种系统默认的大陆编码是GB2312，转换成系统内部的Unicode-16根本就没有数学公式，完全是查表（还有很多Unicode-16没有对应的GB2312，更是无法转换，常见的？号乱码就是这种原因，故不是可以互相顛倒转换的），但系统报告给你的ANSI编码，就是GB2312。
没有绝对准确的办法从一篇未知编码的文档中识别是什么编码。
[ 本帖最后由 TonyDeng 于
11:07 编辑 ]
授人以渔，不授人以鱼。
来　自：广州
等　级：小飞侠
帖　子：1041
专家分：2730
http://blog.csdn.net/qq/article/details/7550195
想象力征服世界
等　级：论坛游民
帖　子：14
专家分：19
直接在DW里就可以修改
ｗww.jiajunyuanlin.coｍ&&ｗww.futegz.coｍjtiao.coｍ
版权所有，并保留所有权利。
Powered by , Processed in 0.029389 second(s), 8 queries.
Copyright&, BCCN.NET, All Rights Reserved下次自动登录
现在的位置:
& 综合 & 正文
ASCII,UTF-8,Unicode字符串相互转换
#include&string&
#include&windows.h&
#include&vector&
//utf8 转 Unicode
std::wstring Utf82Unicode(const std::string& utf8string)
int widesize = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
throw std::exception("Invalid UTF-8 sequence.");
if (widesize == 0)
throw std::exception("Error in conversion.");
std::vector&wchar_t& resultstring(widesize);
int convresult = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
throw std::exception("La falla!");
return std::wstring(&resultstring[0]);
//unicode 转为 ascii
std::string WideByte2Acsi(std::wstring& wstrcode)
int asciisize = ::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, NULL, 0, NULL, NULL);
if (asciisize == ERROR_NO_UNICODE_TRANSLATION)
throw std::exception("Invalid UTF-8 sequence.");
if (asciisize == 0)
throw std::exception("Error in conversion.");
std::vector&char& resultstring(asciisize);
int convresult =::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, &resultstring[0], asciisize, NULL, NULL);
if (convresult != asciisize)
throw std::exception("La falla!");
return std::string(&resultstring[0]);
//utf-8 转 ascii
std::string UTF_82ASCII(std::string& strUtf8Code)
std::string strRet("");
//先把 utf8 转为 unicode
std::wstring wstr = Utf82Unicode(strUtf8Code);
//最后把 unicode 转为 ascii
strRet = WideByte2Acsi(wstr);
return strR
///////////////////////////////////////////////////////////////////////
//ascii 转 Unicode
std::wstring Acsi2WideByte(std::string& strascii)
int widesize = MultiByteToWideChar (CP_ACP, 0, (char*)strascii.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
throw std::exception("Invalid UTF-8 sequence.");
if (widesize == 0)
throw std::exception("Error in conversion.");
std::vector&wchar_t& resultstring(widesize);
int convresult = MultiByteToWideChar (CP_ACP, 0, (char*)strascii.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
throw std::exception("La falla!");
return std::wstring(&resultstring[0]);
//Unicode 转 Utf8
std::string Unicode2Utf8(const std::wstring& widestring)
int utf8size = ::WideCharToMultiByte(CP_UTF8, 0, widestring.c_str(), -1, NULL, 0, NULL, NULL);
if (utf8size == 0)
throw std::exception("Error in conversion.");
std::vector&char& resultstring(utf8size);
int convresult = ::WideCharToMultiByte(CP_UTF8, 0, widestring.c_str(), -1, &resultstring[0], utf8size, NULL, NULL);
if (convresult != utf8size)
throw std::exception("La falla!");
return std::string(&resultstring[0]);
//ascii 转 Utf8
std::string ASCII2UTF_8(std::string& strAsciiCode)
std::string strRet("");
//先把 ascii 转为 unicode
std::wstring wstr = Acsi2WideByte(strAsciiCode);
//最后把 unicode 转为 utf8
strRet = Unicode2Utf8(wstr);
return strR
&&&&推荐文章:
【上篇】【下篇】C# UTF8编码与 UNICODE编码互相转换函数
//UTF8编码与 UNICODE编码互相转换函数//////////////////////////////////////////////////////////////////////////
#include &stdio.h&#include &string.h&#include &malloc.h&#include &wchar.h&#include &assert.h&
//////////////////////////////////////////////////////////////////////////
wchar_t* ConvertUtf8ToUnicode(char* putf8){int i,k,
assert(putf8);
len = strlen(putf8);wchar_t* result = (wchar_t*)malloc(sizeof(wchar_t)*len);if (result){&& k = 0;&& for (i=0;i&)&& {&&& if (0 == (0x80 & putf8[i]))&&& {//one byte&&&& result[k++] = (wchar_t)putf8[i++];&&&&&&& }
&&& if (0xe0 == (0xe0 & putf8[i]))&&& {//three byte&&&& result[k] = (putf8[i] & 0x1F) && 12;&&&& result[k] |= (putf8[i+1] & 0x3F) && 6;&&&& result[k++] |= (putf8[i+2] & 0x3F);
&&&& i += 3;
&&& if (0xc0 == 0xc0 & putf8[i])&&& {//two byte&&&& result[k] = (putf8[i] & 0x1F) && 8;&&&& result[k++] |= (putf8[i+1] & 0x3F);
&&&& i += 2;&&& }&& }&& result[k++] = 0;}}
char* ConvertUnicodeToUtf8(wchar_t* punicode){int i,k,
assert(punicode);len = wcslen(punicode);char* result = (char*)malloc(sizeof(char)*len*3);if (NULL != result){&& k = 0;&& for (i=0;i&i++)&& {&&& if (0x80 & punicode[i])&&& {&&&& result[k++] = (char)punicode[i];&&& }&&& else&&& {&&&&&&& result[k++] = (char)(0xe0|(punicode[i]&&12));&&&& result[k++] = (char)(0x80|((punicode[i]&&6)&0x3f));&&&& result[k++] = (char)(0x80|(punicode[i]&0x3f));&&& }&& }&& result[k++] = 0;}}
请各位遵纪守法并注意语言文明}

奇偶密码网