VC中實現GB2312、BIG5、Unicode編碼轉換的方法

2020-01-26 15:27:46

字體：大中小

來源：轉載

供稿：網友

本文主要以實例形式討論了VC編譯環境下，實現字符串和文件編碼方式轉換的方法，在linux下請使用Strconv來實現。具體方法如下：

一、文件編碼格式轉換

//GB2312 編碼文件轉換成 Unicode：if((file_handle = fopen(filenam,"rb")) != NULL){    //從GB2312源文件以二進制的方式讀取buffer    numread = fread(str_buf_pool,sizeof(char),POOL_BUFF_SIZE,file_handle);    fclose(file_handle);   //GB2312文件buffer轉換成UNICODE   nLen  =MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,NULL,0);    MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,(LPWSTR)str_unicode_buf_pool,nLen);   //組裝UNICODE Little Endian編碼文件文件頭標示符"0xFF 0xFE"    //備注：UNICODE Big Endian編碼文件文件頭標示符"0xFF 0xFE"    //Little Endian與Big Endian編碼差異此處不詳述   unicode_little_file_header[0]=0xFF;   unicode_little_file_header[1]=0xFE;   //存儲目標文件   if((file_handle=fopen(filenewname,"wb+")) != NULL)   {   fwrite(unicode_little_file_header,sizeof(char),2,file_handle);   numwrite = fwrite(str_unicode_buf_pool,sizeof(LPWSTR),nLen,file_handle);   fclose(file_handle);   }}

二、字符串編碼格式轉換

//GB2312 轉換成 Unicode：wchar_t* GB2312ToUnicode(const char* szGBString){    UINT nCodePage = 936; //GB2312    int nLength=MultiByteToWideChar(nCodePage,0,szGBString,-1,NULL,0);    wchar_t* pBuffer = new wchar_t[nLength+1];    MultiByteToWideChar(nCodePage,0,szGBString,-1,pBuffer,nLength);    pBuffer[nLength]=0;    return pBuffer;}//BIG5 轉換成 Unicode：wchar_t* BIG5ToUnicode(const char* szBIG5String){    UINT nCodePage = 950; //BIG5    int nLength=MultiByteToWideChar(nCodePage,0,szBIG5String,-1,NULL,0);    wchar_t* pBuffer = new wchar_t[nLength+1];    MultiByteToWideChar(nCodePage,0,szBIG5String,-1,pBuffer,nLength);    pBuffer[nLength]=0;    return pBuffer;}//Unicode 轉換成 GB2312：char* UnicodeToGB2312(const wchar_t* szUnicodeString){    UINT nCodePage = 936; //GB2312    int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL);    char* pBuffer=new char[nLength+1];    WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL);    pBuffer[nLength]=0;    return pBuffer;}//Unicode 轉換成 BIG5：char* UnicodeToBIG5(const wchar_t* szUnicodeString){    UINT nCodePage = 950; //BIG5    int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL);    char* pBuffer=new char[nLength+1];    WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL);    pBuffer[nLength]=0;    return pBuffer;}//繁體中文BIG5 轉換成 簡體中文 GB2312char* BIG5ToGB2312(const char* szBIG5String){    LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);    wchar_t* szUnicodeBuff = BIG5ToUnicode(szBIG5String);    char* szGB2312Buff = UnicodeToGB2312(szUnicodeBuff);    int nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0);    char* pBuffer = new char[nLength + 1];    LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength);    pBuffer[nLength] = 0;        delete[] szUnicodeBuff;    delete[] szGB2312Buff;    return pBuffer;}//簡體中文 GB2312 轉換成 繁體中文BIG5char* GB2312ToBIG5(const char* szGBString){    LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);    int nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,NULL,0);    char* pBuffer=new char[nLength+1];    LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,pBuffer,nLength);    pBuffer[nLength]=0;    wchar_t* pUnicodeBuff = GB2312ToUnicode(pBuffer);    char* pBIG5Buff = UnicodeToBIG5(pUnicodeBuff);    delete[] pBuffer;    delete[] pUnicodeBuff;    return pBIG5Buff;}

三、API 函數：MultiByteToWideChar參數說明

第一個參數為代碼頁, 用 GetLocaleInfo 函數獲取當前系統的代碼頁，936: 簡體中文, 950: 繁體中文
第二個參數為選項，一般用 0 就可以了
第三個參數為 ANSI 字符串的地址, 這個字符串是第一個參數指定的語言的 ANSI 字符串 (AnsiString)
第四個參數為 ANSI 字符串的長度，如果用 -1, 就表示是用 0 作為結束符的字符串
第五個參數為轉化生成的 unicode 字符串 (WideString) 的地址, 如果為 NULL, 就是代表計算生成的字符串的長度
第六個參數為轉化生成的 unicode 字符串緩存的容量,也就是有多少個UNICODE字符。

上一篇：C和MFC巧妙獲取外網IP的兩種實現方法

下一篇：VC小技巧匯總之5則實用小技巧