VC中实现GB2312、BIG5、Unicode编码转换的⽅法
本⽂主要以实例形式讨论了VC编译环境下,实现字符串和⽂件编码⽅式转换的⽅法,在linux下请使⽤Strconv来实现。具体⽅法如下:
⼀、⽂件编码格式转换
//GB2312 编码⽂件转换成 Unicode:
if((file_handle = fopen(filenam,"rb")) != NULL)
{
//从GB2312源⽂件以⼆进制的⽅式读取buffer
numread = fread(str_buf_pool,sizeof(char),POOL_BUFF_SIZE,file_handle);
fclose(file_handle);
//GB2312⽂件buffer转换成UNICODE
nLen  =MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,NULL,0);
MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,(LPWSTR)str_unicode_buf_pool,nLen);
//组装UNICODE Little Endian编码⽂件⽂件头标⽰符"0xFF 0xFE"
//备注:UNICODE Big Endian编码⽂件⽂件头标⽰符"0xFF 0xFE"
//Little Endian与Big Endian编码差异此处不详述
unicode_little_file_header[0]=0xFF;
unicode_little_file_header[1]=0xFE;
//存储⽬标⽂件
if((file_handle=fopen(filenewname,"wb+")) != NULL)
{
fwrite(unicode_little_file_header,sizeof(char),2,file_handle);
numwrite = fwrite(str_unicode_buf_pool,sizeof(LPWSTR),nLen,file_handle);
fclose(file_handle);
}
}
⼆、字符串编码格式转换
//GB2312 转换成 Unicode:
wchar_t* GB2312ToUnicode(const char* szGBString)
{
UINT nCodePage = 936; //GB2312
int nLength=MultiByteToWideChar(nCodePage,0,szGBString,-1,NULL,0);
wchar_t* pBuffer = new wchar_t[nLength+1];
MultiByteToWideChar(nCodePage,0,szGBString,-1,pBuffer,nLength);
pBuffer[nLength]=0;
return pBuffer;
}
//BIG5 转换成 Unicode:
wchar_t* BIG5ToUnicode(const char* szBIG5String)
{unicode文件格式
UINT nCodePage = 950; //BIG5
int nLength=MultiByteToWideChar(nCodePage,0,szBIG5String,-1,NULL,0);
wchar_t* pBuffer = new wchar_t[nLength+1];
MultiByteToWideChar(nCodePage,0,szBIG5String,-1,pBuffer,nLength);
pBuffer[nLength]=0;
return pBuffer;
}
//Unicode 转换成 GB2312:
char* UnicodeToGB2312(const wchar_t* szUnicodeString)
{
UINT nCodePage = 936; //GB2312
int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL);
char* pBuffer=new char[nLength+1];
WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL);
pBuffer[nLength]=0;
return pBuffer;
}
//Unicode 转换成 BIG5:
char* UnicodeToBIG5(const wchar_t* szUnicodeString)
{
UINT nCodePage = 950; //BIG5
int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL);
char* pBuffer=new char[nLength+1];
WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL);
pBuffer[nLength]=0;
return pBuffer;
}
/
/繁体中⽂BIG5 转换成简体中⽂ GB2312
char* BIG5ToGB2312(const char* szBIG5String)
{
LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);
wchar_t* szUnicodeBuff = BIG5ToUnicode(szBIG5String);
char* szGB2312Buff = UnicodeToGB2312(szUnicodeBuff);
int nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0);
char* pBuffer = new char[nLength + 1];
LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength);
pBuffer[nLength] = 0;
delete[] szUnicodeBuff;
delete[] szGB2312Buff;
return pBuffer;
}
//简体中⽂ GB2312 转换成繁体中⽂BIG5
char* GB2312ToBIG5(const char* szGBString)
{
LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);
int nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,NULL,0);
char* pBuffer=new char[nLength+1];
LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,pBuffer,nLength);
pBuffer[nLength]=0;
wchar_t* pUnicodeBuff = GB2312ToUnicode(pBuffer);
char* pBIG5Buff = UnicodeToBIG5(pUnicodeBuff);
delete[] pBuffer;
delete[] pUnicodeBuff;
return pBIG5Buff;
}
三、API 函数:MultiByteToWideChar参数说明
第⼀个参数为代码页, ⽤ GetLocaleInfo 函数获取当前系统的代码页,936: 简体中⽂, 950: 繁体中⽂
第⼆个参数为选项,⼀般⽤ 0 就可以了
第三个参数为 ANSI 字符串的地址, 这个字符串是第⼀个参数指定的语⾔的 ANSI 字符串 (AnsiString)
第四个参数为 ANSI 字符串的长度,如果⽤ -1, 就表⽰是⽤ 0 作为结束符的字符串
第五个参数为转化⽣成的 unicode 字符串 (WideString) 的地址, 如果为 NULL, 就是代表计算⽣成的字符串的长度第六个参数为转化⽣成的 unicode 字符串缓存的容量,也就是有多少个UNICODE字符。

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。