python2.7 字符处理小节

unicode是字符集

utf-8，gbk是编码方式，将字符集编码为机器识别的字节码

#encoding: utf-8


s = "中文"  #unicode的utf-8编码，xE4xB8xADxE6x96x87
us = u"中文"  #unicode字符集 u4E2D u6587

print repr(s)
print repr(us)

print s.decode('utf-8')   #utf-8解码为无编码的unicode u4E2D u6587
print us.encode('utf-8')  #uicode编码为utf-8

print repr(us.encode('gbk')) #uicode编码为gbk： 'xd6xd0xcexc4'

str='u4E2Du6587' 
print str str2= str.decode('unicode-escape') #字符串转换为unicode编码

print str2 

print repr(str)