Pandas入门之八：字符串与文本数据

已信任
Jupyter 服务器: 本地
Python 3: Not Started
[13]



import pandas as pd
import numpy as np
[15]



s = pd.Series(['   Tom','   xiaoming','john   '])
s
0            Tom
1       xiaoming
2        john   
dtype: object
[16]



# 删除空格
s.str.strip()
0         Tom
1    xiaoming
2        john
dtype: object
[17]



# 字符分割
s.str.split('o')
0         [   T, m]
1    [   xia, ming]
2        [j, hn   ]
dtype: object
[18]



# 字符拼接
s.str.cat(sep='<=>')
'   Tom<=>   xiaoming<=>john   '
[19]



# 获取onehot编码
s.str.get_dummies()
Tom    xiaoming    john
0    1    0    0
1    0    1    0
2    0    0    1



[20]




# 字符串是否包含什么内容
s.str.contains('m')
0     True
1     True
2    False
dtype: bool
[21]



# 字符串替换
s.str.replace('o','aaa')
0            Taaam
1       xiaaaaming
2        jaaahn   
dtype: object
[22]



# 计数
s.str.count('i')
0    0
1    2
2    0
dtype: int64
[33]



s = pd.Series(['348','  697 xiaoming','john   '])
s
0               348
1      697 xiaoming
2           john   
dtype: object
[34]



# 判断字符串是否为数字
s.str.isnumeric()
0     True
1    False
2    False
dtype: bool
[-]



[-]