mysql表utf-8 字符串入库编码异常

分析:http://www.myexception.cn/mysql/639943.html

解决方法:http://blog.sina.com.cn/s/blog_3f78232201011o26.html

public class Utf8Filter {

 

public static void main(String[] args) throws Exception {

 

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41 }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0xf3, (byte) 0xb7,

(byte) 0xa2, (byte) 0xbe }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3,

(byte) 0xb7, (byte) 0xa2, (byte) 0xbe }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0xf3, (byte) 0xb7,

(byte) 0xa2, (byte) 0xbe, (byte) 0x41 }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3,

(byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0x41 }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0xf3, (byte) 0xb7,

(byte) 0xa2, (byte) 0xbe, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2, (byte) 0xbe,

(byte) 0x41 }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3,

(byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2,

(byte) 0xbe }, "utf-8")));

System.out.println(filterMt4BytesUtf8(new String(new byte[] { (byte) 0x41, (byte) 0xf3,

(byte) 0xb7, (byte) 0xa2, (byte) 0xbe, (byte) 0xf3, (byte) 0xb7, (byte) 0xa2,

(byte) 0xbe, (byte) 0x41 }, "utf-8")));

}

 

private static String filterMt4BytesUtf8(String input) {

if (StringUtils.isBlank(input))

return input;

 

ByteArrayOutputStream is = new ByteArrayOutputStream();

byte[] bytes = input.getBytes();

 

outter: for (int i = 0, length = bytes.length; i < length; i++) {

byte b = bytes[i];

while (((b & 0xF8) == 0xF0) || ((b & 0xFc) == 0xF8) || ((b & 0xFe) == 0xFc)) {

// found a byte of 4, 5, 6 UTF-8 bytes for a character

// eat the following bytes of this character

while (++i < length && ((b = bytes[i]) & 0xC0) == 0x80)

;

// insert a replacement character

is.write((byte) 0xEF);

is.write((byte) 0xBF);

is.write((byte) 0xBD);

if (i >= length) {

// we reach the end of byte array

break outter;

}

}

// found byte of 1, 2, 3 UTF-8 bytes for a character

is.write(b);

}

try {

return new String(is.toByteArray(), "utf-8");

} catch (UnsupportedEncodingException e) {

logger.error("filter more than 4 bytes utf-8 character failed!", e);

}

return input;

}

}
原文地址:https://www.cnblogs.com/sunxucool/p/3290728.html