Windows下编码转换相关(UTF-8 & UTF-16)

一、ANSI转换为UTF-16

#include <windows.h>

wchar_t* AnsiToUnicodeBegin(const char* str)
{
  char* sz = new char[strlen(str)+1];
  strcpy(sz, str);
  int wLen = MultiByteToWideChar(CP_ACP, 0, sz, -1, NULL, 0);
  wchar_t* wBuf = new wchar_t[wLen+1];
  MultiByteToWideChar(CP_ACP, 0, sz, -1, wBuf, wLen);
  delete[] sz;
  return wBuf;
}

void AnsiToUnicodeEnd(wchar_t* wstr_chaged)
{
  if(wstr_chaged)
  {
    delete[] wstr_chaged;
    wstr_chaged = NULL;
  }
}

二、UTF-8转换为UTF-16

wchar_t* UTF8ToUnicodeBegin(const char* str)
{
  char* sz = new char[strlen(str)+1];
  strcpy(sz, str);
  int wLen = MultiByteToWideChar(CP_UTF8, 0, sz, -1, NULL, 0);
  wchar_t* wBuf = new wchar_t[wLen+1];
  MultiByteToWideChar(CP_UTF8, 0, sz, -1, wBuf, wLen);
  delete[] sz;
  return wBuf;
}

void UTF8ToUnicodeEnd(wchar_t* wstr_chaged)
{
  if(wstr_chaged)
  {
    delete[] wstr_chaged;
    wstr_chaged = NULL;
  }
}

三、UTF-16转换为UTF-8

char* UnicodeToUTF8Begin(const wchar_t* wstr)
{
  wchar_t *sz = new wchar_t[wcslen(wstr)+1];
  wcscpy(sz,wstr);
  int len = WideCharToMultiByte(CP_UTF8,0,sz,-1,NULL,0,0,0);
  char* buf = new char[len+1];
  WideCharToMultiByte(CP_UTF8,0,sz,-1,buf,len,NULL,NULL);
  delete[] sz;
  return buf;
}

void UnicodeToUTF8End(const char* str_chaged)
{
  if(str_chaged)
  {
    delete[] str_chaged;
    str_chaged = NULL;
  }
}

四、读写UTF-8格式文件,转换为UTF-16处理,最后转回UTF-8写入

int main()
{
  char lineBuf[256]={0};
  char headFlag[3];

  FILE *fpr;
  fpr = fopen("c:\test.txt","rb");
  fread(headFlag,1,3,fpr);
  fgets(lineBuf,256,fpr);
  wchar_t* wstr = UTF8ToUnicodeBegin(lineBuf);

  char* str = UnicodeToUTF8Begin(wstr);

  FILE *fpw;
  fpw = fopen("c:\outputUTF-8.txt","w");
  fwrite(headFlag,1,3,fpw);
  fwrite(str,1,strlen(str),fpw);


  UnicodeToUTF8End(str);
  UTF8ToUnicodeEnd(wstr);

  fclose(fpr);
  fclose(fpw);

  system("pause");
  return 0;
}

下面是我用C++写的一个转换类。功能是读取UTF8文件到wstring中处理,然后再转换为UTF8编码的字符串,最后写回文件中。

 1 #ifndef CHARACTERCONVERT_H_
 2 #define CHARACTERCONVERT_H_
 3 
 4 #include <string>
 5 namespace MyLIB
 6 {
 7 
 8     class CharacterConvert
 9     {
10     public:
11         static void ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16);
12         static void ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8);
13     private:
14         CharacterConvert(void);
15         ~CharacterConvert(void);
16     };
17 
18 }
19 #endif
 1 #include "StdAfx.h"
 2 #include "CharacterConvert.h"
 3 #include <Windows.h>
 4 
 5 
 6 using namespace MyLIB;
 7 
 8 void CharacterConvert::ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16)
 9 {
10     int wLen = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);
11     wchar_t* wBuf = new wchar_t[wLen+1];
12     if(wBuf==NULL)
13     {
14         return;
15     }
16     MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wBuf, wLen);
17     strUtf16.assign(wBuf);
18     if(wBuf!=NULL)
19     {
20         delete[] wBuf;
21         wBuf = NULL;
22     }
23 }
24 
25 void CharacterConvert::ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8)
26 {
27     int len = WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,NULL,0,0,0);
28     char *buf = new char[len+1];
29     if(buf==NULL)
30     {
31         return;
32     }
33     WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,buf,len,NULL,NULL);
34     strUtf8.assign(buf);
35     if(buf!=NULL)
36     {
37         delete[] buf;
38         buf=NULL;
39     }
40 }
 1 // STLTest.cpp : 
2 // 3 4 #include "stdafx.h" 5 #include <iostream> 6 #include <fstream> 7 #include <string> 8 #include <algorithm> 9 #include "CharacterConvert.h" 10 using namespace std; 11 12 13 14 int _tmain(int argc, _TCHAR* argv[]) 15 { 16 string input; 17 wstring output; 18 string utf8; 19 ifstream fin("testUTF8.txt",ios_base::in|ios_base::binary); 20 if(!fin.is_open()) 21 { 22 return -1; 23 } 24 ofstream fout("UTF8Output.txt",ios_base::out|ios_base::binary); 25 if(!fout.is_open()) 26 { 27 return -1; 28 } 29 30 while(getline(fin,input)) 31 { 32 MyLIB::CharacterConvert::ConvertUTF8ToUnicode(input,output); 33 MyLIB::CharacterConvert::ConvertUnicodeToUTF8(output,utf8); 34 fout << utf8 << endl; 35 } 36 37 return 0; 38 }
原文地址:https://www.cnblogs.com/elitiwin/p/3965308.html