UTF8 转 Unicode

int UTF2Uni(const char* src, AECHAR *&t)
{
 if (src == NULL)
 {
  return -1;
 }

 int size_s = STRLEN(src);
 int size_d = size_s*2 +1;          //?
 
 AECHAR *des = new AECHAR[size_d];
 MEMSET(des, 0, size_d * sizeof(AECHAR));
 
 int s = 0, d = 0;
 bool toomuchbyte = true; //set true to skip error prefix.
 
 while (s < size_s && d < size_d)
 {
  unsigned char c = src[s];
  if ((c & 0x80) == 0)
  {
   des[d++] += src[s++];
  }
  else if((c & 0xE0) == 0xC0)  ///< 110x-xxxx 10xx-xxxx
  {
   AECHAR &wideChar = des[d++];
   wideChar  = (src[s + 0] & 0x3F) << 6;
   wideChar |= (src[s + 1] & 0x3F);
  
   s += 2;
  }
  else if((c & 0xF0) == 0xE0)  ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
  {
   AECHAR &wideChar = des[d++];
  
   wideChar  = (src[s + 0] & 0x1F) << 12;
   wideChar |= (src[s + 1] & 0x3F) << 6;
   wideChar |= (src[s + 2] & 0x3F);
  
   s += 3;
  }
  else if((c & 0xF8) == 0xF0)  ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
  {
   AECHAR &wideChar = des[d++];
  
   wideChar  = (src[s + 0] & 0x0F) << 18;
   wideChar  = (src[s + 1] & 0x3F) << 12;
   wideChar |= (src[s + 2] & 0x3F) << 6;
   wideChar |= (src[s + 3] & 0x3F);
  
   s += 4;
  }
  else
  {
   AECHAR &wideChar = des[d++]; ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
  
   wideChar  = (src[s + 0] & 0x07) << 24;
   wideChar  = (src[s + 1] & 0x3F) << 18;
   wideChar  = (src[s + 2] & 0x3F) << 12;
   wideChar |= (src[s + 3] & 0x3F) << 6;
   wideChar |= (src[s + 4] & 0x3F);

   s += 5;
  }
 }
 
 t = des;
// delete[] des;   //care here
 des = NULL;
 
 return 0;
}

原文地址:https://www.cnblogs.com/secbook/p/2655474.html