GB2312UTF8UCS2汉字编码对应表
最近⽼是遇到遇到中⽂编码转换的问题,但是⼜不是所有的环境都提供编码转换的⼯具。所以不如⾃⼰把整个汉字编码库搞下来,再逐个查询。
对各个编码的解释看⼤佬的⽂章:
可以相互佐证。⾄于百度到的UTF-8计算⼯具,它们只是把Unicode的编码放出来,Unicode还分⼤头和⼩头呢,根本就不是UTF-8的编码,中⽂的UTF-8编码⾄少是3个字节的。
常⽤字⽤GB2312就⾜够,内存不⼤,甚⾄可以放到单⽚机上。
刚好⽤了 下QT,qt有转码⽤的库,GB2312的编码是按顺序排列,从第1区到第87区,10~15区没编码。所以可以直接for循环⽣成再转换就完事了。顺便做个⼯具后⾯⽅便使⽤。
使⽤界⾯:
编码转换
按区⽣成
⽣成的库C库:使⽤⽅法,⽐如已经编码x,编码⽅式为GB2312,先⽐对X在GB2312库的位置,到位置下标,这个也是x在其它库的位置下标
/*
* 单个编码转换,转换失败的话返回原始编码
* code:输⼊原始编码
* intype:原始编码类型:下⾯其中的⼀个
* GB2312,
* UTF8,
* UCS2_BIG,
* UCS2_LITTLE,
* wanttype:⽬标编码类型,下⾯其中的⼀个
* GB2312,
* UTF8,
* UCS2_BIG,
* UCS2_LITTLE,
*
*/
uint32_t textCode::textCodeConvet(uint32_t code, textCode::CodeType intype, textCode::CodeType wanttype)
{
const uint32_t *intp; //懒得做16,8位的区分了,使⽤32位的,存储估计要炸
const uint32_t *wantp;//懒得做16,8位的区分了,使⽤32位的,存储估计要炸
switch (intype) { //区分输⼊代码库
case textCode::GB2312:
intp=_GB2312_1_87;
break;
case textCode::UTF8:
intp=_UTF8_1_87;
break;
case textCode::UCS2_BIG:
intp=_UCS2_Big_1_87;
break;
case textCode::UCS2_LITTLE:
intp=_UCS2_Little_1_87;
break;
default:
return code;
break;
}
switch (wanttype) {//区分输出代码库
case textCode::GB2312:
wantp=_GB2312_1_87;
wantp=_GB2312_1_87;
break;
case textCode::UTF8:
wantp=_UTF8_1_87;
break;
case textCode::UCS2_BIG:
wantp=_UCS2_Big_1_87;
break;
case textCode::UCS2_LITTLE:
wantp=_UCS2_Little_1_87;
break;
default:
return code;
break;
}
if((code & 0x00ff)&&(code & 0xff00))// ⾼位,低位都不为零
for(uint32_t i=0;i<TEXTCODE_SIZE;i++)//TEXTCODE_SIZE 7614        {
if(intp[i] == code) //到编码的位置
{
unicode在线工具return wantp[i];//输出对应编码
}
}
return code;
}
#include <stdint.h>
// GB2312原始编码从0XA1A0到0XFEFF。从01到94个区,其中10~15,(6个区)没有编码,88~94区(7区)没有编码。
//有效编码区只有81个区,分别是01~09区、6~87区
//每个区有从0xXXA1到0xXXDE,A0和EF都没有编码,有效编码94个
//所以共有编码个数94*81=7614。
// GB2312的编码为两个字节,转UTF编码后有3个字节,转UCS编码有2个字节。
/
/ Unicode编码会⽤UCS-BIG或者UCS-LITTLE(⼤端或者⼩端),它们的区别就是⾼低字节位置不同。
//⽐如中⽂编码
//  “哈”:
//  GB2312 0xB9FE
//  UTF-8  0xE59388
//  UCS-BIG  0x54C8
//  UCS-LITTLE 0xC854
const uint32_t _GB2312_16_16[]={ //前缀
0xB0A1, 0xB0A2, 0xB0A3, 0xB0A4, 0xB0A5, 0xB0A6, 0xB0A7, 0xB0A8, 0xB0A9, 0xB0AA, 0xB0AB, 0xB0AC, 0xB0AD, 0xB0AE, 0xB0AF,  //第16区: B0A0 : 啊,阿,埃 0xB0B0, 0xB0B1, 0xB0B2, 0xB0B3, 0xB0B4, 0xB0B5, 0xB0B6, 0xB0B7, 0xB0B8, 0xB0B9, 0xB0BA, 0xB0BB, 0xB0BC, 0xB0BD,
0xB0BE, 0xB0BF,  //第16区: B0B0 0xB0C0, 0xB0C1, 0xB0C2, 0xB0C3, 0xB0C4, 0xB0C5, 0xB0C6, 0xB0C7, 0xB0C8, 0xB0C9, 0xB0CA, 0xB0CB, 0xB0CC, 0xB0CD, 0xB0CE, 0xB0CF,  //第16区: B0 0xB0D0, 0xB0D1, 0xB0D2, 0xB0D3, 0xB0D4, 0xB0D5, 0xB0D6, 0xB0D7, 0xB0D8, 0xB0D9, 0xB0DA, 0xB0DB, 0xB0DC, 0xB0DD, 0xB0DE, 0xB0DF,  //第16区: B0 0xB0E0, 0xB0E1, 0xB0E2, 0xB0E3, 0xB0E4, 0xB0E5, 0xB0E6, 0xB0E7, 0xB0E8, 0xB0E9, 0xB0EA, 0xB0EB, 0xB0EC, 0xB0ED, 0xB0EE, 0xB0EF,  //第16区: B0E0 0xB0F0, 0xB0F1, 0xB0F2, 0xB0F3, 0xB0F4, 0xB0F5, 0xB0F6, 0xB0F7, 0xB0F8, 0xB0F9, 0xB0FA, 0xB0FB, 0xB0FC, 0xB0FD, 0xB0FE,  //第16区: B0F0 : 梆,榜,膀
};//后缀
const uint32_t _UTF8_16_16[]={ //前缀
0xE5958A, 0xE998BF, 0xE59F83, 0xE68CA8, 0xE5938E, 0xE59489, 0xE59380, 0xE79A91, 0xE7998C, 0xE894BC, 0xE79FAE, 0xE889BE, 0xE7A28D, 0xE788B1 0xE99E8D, 0xE6B0A8, 0xE5AE89, 0xE4BFBA, 0xE68C89, 0xE69A97, 0xE5B2B8, 0xE883BA, 0xE6A188, 0xE882AE, 0xE69882, 0xE79B8E, 0xE587B9, 0xE6959 0xE8A284, 0xE582B2, 0xE5A5A5, 0xE6878A, 0xE6BEB3, 0xE88AAD, 0xE68D8C, 0xE68992, 0xE58FAD, 0xE590A7, 0xE7AC86, 0xE585AB, 0xE
796A4, 0xE5B7B  0xE99DB6, 0xE68A8A, 0xE88099, 0xE59D9D, 0xE99CB8, 0xE7BDA2, 0xE788B8, 0xE799BD, 0xE69F8F, 0xE799BE, 0xE69186, 0xE4BDB0, 0xE8B4A5, 0xE68B  0xE78FAD, 0xE690AC, 0xE689B3, 0xE888AC, 0xE9A281, 0xE69DBF, 0xE78988, 0xE689AE, 0xE68B8C, 0xE4BCB4, 0xE793A3, 0xE58D8A, 0xE58A9E, 0xE7BB  0xE6A286, 0xE6A69C, 0xE88680, 0xE7BB91, 0xE6A392, 0xE7A385, 0xE89A8C, 0xE99591, 0xE5828D, 0xE8B0A4, 0xE88B9E, 0xE8839E, 0xE58C85, 0xE8A49
};//后缀
const uint32_t _UCS2_Big_16_16[]={ //前缀
0x554A,0x963F,0x57C3,0x6328,0x54CE,0x559,0x54C0,0x7691,0x764C,0x853C,0x77EE,0x827E,0x788D,0x7231,0x9698, //第16区: B0A0 : 啊,阿,埃,挨,哎,唉,哀,皑 0x978D,0x6C28,0x5B89,0x4FFA,0x639,0x6697,0x5CB8,0x80FA,0x6848,0x80AE,0x662,0x76CE,0x51F9,0x6556,0x71AC,0x7FF1, //第16区: B0B0 : 鞍,氨,安,俺,按 0x8884,0x50B2,0x5965,0x61CA,0x6FB3,0x82AD,0x634C,0x6252,0x53ED,0x5427,0x7B6,0x516B,0x75A4,0x5DF4,0x62D4,0x8DCB, //第16区: B0C0 : 袄,傲,奥,懊 0x9776,0x628A,0x8019,0x575D,0x9738,0x7F62,0x7238,0x767D,0x67CF,0x767E,0x6446,0x4F70,0x8D25,0x62DC,0x7A17,0x6591, /
/第16区: B0D0 : 靶,把,耙,坝 0x73ED,0x642C,0x6273,0x822C,0x9881,0x677F,0x7248,0x626E,0x62CC,0x4F34,0x74E3,0x534A,0x529E,0x7ECA,0x90A6,0x5E2E, //第16区: B0E0 : 班,搬,扳,般 0x6886,0x699C,0x8180,0x7ED1,0x68D2,0x78C5,0x868C,0x9551,0x508D,0x8C24,0x82DE,0x80DE,0x535,0x8912,0x5265, //第16区: B0F0 : 梆,榜,膀,绑,棒,磅,蚌,
};//后缀
const uint32_t _UCS2_Little_16_16[]={ //前缀
0x4A55,0x3F96,0xC357,0x2863,0xCE54,0x955,0xC054,0x9176,0x4C76,0x3C85,0xEE77,0x7E82,0x8D78,0x3172,0x9896, //第16区: B0A0 : 啊,阿,埃,挨,哎,唉,哀,皑 0x8D97,0x286C,0x895B,0xFA4F,0x963,0x9766,0xB85C,0xFA80,0x4868,0xAE80,0x266,0xCE76,0xF951,0x5665,0xAC71,0xF17F, //第16区: B0B0 : 鞍,氨,安,俺,按 0x8488,0xB250,0x6559,0xCA61,0xB36F,0xAD82,0x4C63,0x5262,0xED53,0x2754,0x67B,0x6B51,0xA475,0xF45D,0xD462,0xCB8D, //第16区: B0C0 : 袄,傲,奥,懊 0x7697,0x8A62,0x1980,0x5D57,0x3897,0x627F,0x3872,0x7D76,0xCF67,0x7E76,0x4664,0x704F,0x258D,0xDC62,0x177A,0x9165, //第16区: B0D0 : 靶,把,耙,坝 0xED73,0x2C64,0x7362,0x2C82,0x8198,0x7F67,0x4872,0x6E62,0
xCC62,0x344F,0xE374,0x4A53,0x9E52,0xCA7E,0xA690,0x2E5E, //第16区: B0E0 : 班,搬,扳,般 0x8668,0x9C69,0x8081,0xD17E,0xD268,0xC578,0x8C86,0x5195,0x8D50,0x248C,0xDE82,0xDE80,0x553,0x1289,0x6552, //第16区: B0F0 : 梆,榜,膀,绑,棒,磅,蚌,
};//后缀

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。