def is_chinese_or_space(str):
"""
>>> is_chinese_or_space(u"中国 人")
True
>>> is_chinese_or_space(u"中国 人1")
False
>>> is_chinese_or_space(u"华为huawei")
False
>>> is_chinese_or_space(u"游泳裤xxxl")
False
"""
if type(str)==type(""):
str=str.encode("utf-8","ignore")
r=[]
for char in str:
r.append(_is_chinese_or_space(char))
if False in r:
return False
return True
def is_english_or_space(str):
"""
>>> is_english_or_space(u"abc def1")
False
>>> is_english_or_space(u"abc def")
True
>>> is_english_or_space(u"游泳裤xxxl")
False
>>> is_english_or_space(u"茶具")
False
"""
if type(str)==type(""):
str=str.encode("utf-8","ignore")
r=[]
for char in str:
r.append(_is_english_or_space(char))
if False in r:
return False
return True
def _is_chinese_or_space(uchar):
"""
>>> is_chinese_or_space(u"人")
True
>>> is_chinese_or_space(u"1")
False
>>> is_chinese_or_space(u" ")
True
"""
if is_chinese(uchar) or uchar==u" ":
return True
else:
return False
def _is_english_or_space(uchar):
"""
>>> _is_english_or_space(u"1")
False
>>> _is_english_or_space(u"a")
True
>>> _is_english_or_space(u" ")
True
>>> _is_english_or_space(u"中")
False
"""
if is_chinese(uchar):
return False
if uchar.isalpha() or uchar==u" ":
return True
return False
def is_chinese(original):
"""
判断一个unicode是否是汉字
>>> is_chinese(u"人")
True
>>> is_chinese("人")
True
>>> is_chinese("1")
False
>>> is_chinese(" ")
False
>>> is_chinese(",")
False
>>> is_chinese(" ")
True
>>> is_chinese("A")
True
>>> is_chinese("你好")
True
>>> is_chinese("你U")
False
>>> is_chinese("U你好")
False
"""
def _is_chinese(char):
#将原先不是unicode的变成unicode,uniocde的变成不是unicode
if type(char)==type(""):
c=str2unicode(char)
else:
c=unicode2str(char)
if len(c)!=len(char):
return True
else:
return False
#if uchar >= u'\u4e00' and uchar<=u'\u9fa5':
# return True
#else:
# return False
r=[]
for char in set(original):
r.append(_is_chinese(char))
if False in r:
return False
return True
def is_number(original):
"""
判断一个unicode是否是数字
>>> is_number(1)
True
>>> is_number("2")
True
>>> is_number("323")
True
>>> is_number("")
False
"""
c=str2unicode(str(original))
if c >= u'\u0030' and c<=u'\u0039':
return True
else:
return False
def is_alphabet(original):
"""
判断一个unicode是否是英文字母
>>> is_alphabet(u"t")
True
>>> is_alphabet("t")
True
>>> is_alphabet("ab")
True
>>> is_alphabet(u"ab")
True
>>> is_alphabet(u"ab一")
False
>>> is_alphabet(u"_1")
False
"""
elems=[]
for i in range(ord('a'),ord('z')+1):
elems.append(chr(i))
for i in range(ord('A'),ord('Z')+1):
elems.append(chr(i))
#print elems
r=[]
for uchar in set(str2unicode(original)):
r.append(uchar in elems)
if False in r:
return False
return True
def is_other(uchar):
"""判断是否非汉字,数字和英文字符"""
if not (is_chinese(uchar) or is_number(uchar) or is_alphabet(uchar)):
return True
else:
return False