Matplotlib可视化

Matplotlib可视化

  • matplotlib及环境配置
  • 数据图的组成结构,与matplotlib对应的名称
  • 常见的数据绘图类型,与绘制方法

matplotlib安装配置

  • linux
    • pip install numpy
    • pip install scipy
    • pip install matplotlib
  • windows
    • anaconda

可视化图的基本结构

  • 通常使用numpy组织数据,使用matplotlib API进行数据图像绘制,一副数据图基本包括如下

    • Data 数据区
    • Axis 坐标轴
    • Title 标题
    • Legend 图例,区分图中包含的多种曲线或不同分类的数据
  • 使用matplotlib绘制图:

    • 导入matplotlib包相关工具
    • 准备数据,numpy数据存储
    • 绘制原始曲线
    • 配置标题,坐标轴,刻度,图例
    • 添加文字说明,注释
    • 显示,保存绘图结构
#导包
%matplotlib inline#表示当前获得图片展示,在ipython展出
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
#准备数据   numpy常用来组织源数据
x = np.arange(0.,10,0.2)#从0开始步长0.2,到9.8
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
x
array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8, 2. , 2.2, 2.4,
       2.6, 2.8, 3. , 3.2, 3.4, 3.6, 3.8, 4. , 4.2, 4.4, 4.6, 4.8, 5. ,
       5.2, 5.4, 5.6, 5.8, 6. , 6.2, 6.4, 6.6, 6.8, 7. , 7.2, 7.4, 7.6,
       7.8, 8. , 8.2, 8.4, 8.6, 8.8, 9. , 9.2, 9.4, 9.6, 9.8])
#绘制plot函数直接绘制上述函数曲线,可以通过配置plot函数参数调整曲线的:
#样式,粗细,颜色,标记等
#color 指定线颜色:r红色,g绿色,b蓝色,c cyan,m紫色,y土黄色,k黑色,w白色
#linewidth 线的粗细
#linestyle 线的样式 实现,虚线...
#marker : 每个点标记样式
#label: 给函数打上标签

plt.plot(x,y1,color="blue",linewidth=1.5,linestyle='-',marker=".",label=r"$y=cos{x}$")
plt.plot(x,y2,color="red",linewidth=1.5,linestyle='-',marker="*",label=r"$y=sin{x}$")
plt.plot(x,y3,color="green",linewidth=1.5,linestyle='-',marker="x",label=r"$y=sqrt{x}$")
[<matplotlib.lines.Line2D at 0x24cce94f9b0>]

#设置坐标轴:
#设置边框
ax = plt.subplot(111)
#将右侧边框颜色设为白色,表示边框线隐藏
ax.spines['right'].set_color("none")
#将上侧边框颜色设为白色,表示边框线隐藏
ax.spines['top'].set_color("none")
#移动下边框,相当于移动x轴
ax.xaxis.set_ticks_position("bottom")
ax.spines['bottom'].set_position(("data",0.2))
#移动左边框,相当于移动y轴
ax.yaxis.set_ticks_position("left")
ax.spines['left'].set_position(("data",0.2))

#设置坐标轴:
#设置边框
ax = plt.subplot(111)
#将右侧边框颜色设为白色,表示边框线隐藏
ax.spines['right'].set_color("none")
#将上侧边框颜色设为白色,表示边框线隐藏
ax.spines['top'].set_color("none")

#移动下边框,相当于移动x轴
ax.xaxis.set_ticks_position("bottom")
ax.spines['bottom'].set_position(("data",0))
#移动左边框,相当于移动y轴
ax.yaxis.set_ticks_position("left")
ax.spines['left'].set_position(("data",0))

#通过如下代码,设置可读尺间隔lim,可读标签ticks
<matplotlib.axes._subplots.AxesSubplot at 0x24ccf4160f0>
#设置x,y轴的可读取值范围
plt.xlim(x.min()*1.1,x.max()*1.1)#设置x取值范围
plt.ylim(-1.5,4.0)#设置y取值范围
#设置x,y轴的可读标签值
plt.xticks([2,4,6,8,10],[r"2",r"4",r"6",r"8",r"10"])
plt.yticks([-1.0,0.0,1.0,2.0,3.0,4.0],[r"-1.0",r"0.0",r"1.0",r"2.0",r"3.0",r"4.0",])
([<matplotlib.axis.YTick at 0x24cce967dd8>,
  <matplotlib.axis.YTick at 0x24cce993860>,
  <matplotlib.axis.YTick at 0x24cce97a518>,
  <matplotlib.axis.YTick at 0x24cceb29898>,
  <matplotlib.axis.YTick at 0x24cceb296a0>,
  <matplotlib.axis.YTick at 0x24cceb29da0>],
 <a list of 6 Text yticklabel objects>)

#设置x,y坐标轴和标题
plt.title(r"$the  function  figure  of  cos(),  sin()  and  sqrt()$",fontsize=19)
#labelpad 标注x,y注释离x,y距离
#fontsize 字体大小
plt.xlabel(r"$the  input  value  of  x$",fontsize=18,labelpad=50.8)
plt.ylabel(r"$y=f(x)$",fontsize=18,labelpad=12.5)
Text(0, 0.5, '$y=f(x)$')

#设置文字描述注释:对变量x,y进行描述,注释。
#通过代码在数据图中添加文字描述
plt.text(4,1.68,r"$x  in [0.0,  10.0]$",color='k',fontsize=15)
plt.text(4,1.38,r"$y  in [-1.0,  4.0]$",color='k',fontsize=15)
Text(4, 1.38, '$y \ in [-1.0, \ 4.0]$')

#对特殊点增加注释

#    arrowprops表示描述箭头属性
plt.scatter([8,],[np.sqrt(8),],5,color="m")#使用三点图放大当前点
plt.annotate(r"$2sqrt{2}$",xy=(8,np.sqrt(8)),xytext=(8.5,2.2),fontsize=16,color="#090909",
             arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.1",color="#090909"))

Text(8.5, 2.2, '$2\sqrt{2}$')

#设置图例
    #可使用如下两种方式,给绘图设置图例
    #1. 在plt.plot函数中添加label参数后,使用plt.legend(loc="upper right")
    #2. 不使用参数label,直接使用如下命令
plt.legend(['cos(x)',"sin(x)","sqrt(x)"],loc="upper right")
<matplotlib.legend.Legend at 0x24cd14cfd68>

#指定网格线开关
plt.grid(True)

#显示图像,并保存
plt.show()
#保存   savefig("xxx/xxx/png",dpi=48)
%matplotlib inline
#表示当前获得图片展示,在ipython展出
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
#定义数据部分
x = np.arange(0.,10,0.2)#从0开始步长0.2,到9.8
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
#绘制 3条函数曲线
plt.plot(x,y1,color="blue",linewidth=1.5,linestyle='-',marker=".",label=r"$y=cos{x}$")
plt.plot(x,y2,color="red",linewidth=1.5,linestyle='-',marker="*",label=r"$y=sin{x}$")
plt.plot(x,y3,color="green",linewidth=1.5,linestyle='-',marker="x",label=r"$y=sqrt{x}$")
#坐标轴上移
#设置边框
ax = plt.subplot(111)
#将右侧边框颜色设为白色,表示边框线隐藏
ax.spines['right'].set_color("none")
#将上侧边框颜色设为白色,表示边框线隐藏
ax.spines['top'].set_color("none")

#移动下边框,相当于移动x轴
ax.xaxis.set_ticks_position("bottom")
ax.spines['bottom'].set_position(("data",0))
#移动左边框,相当于移动y轴
ax.yaxis.set_ticks_position("left")
ax.spines['left'].set_position(("data",0))

#设置x,y轴的可读取值范围
plt.xlim(x.min()*1.1,x.max()*1.1)#设置x取值范围
plt.ylim(-1.5,4.0)#设置y取值范围
#设置x,y轴的可读标签值
plt.xticks([2,4,6,8,10],[r"2",r"4",r"6",r"8",r"10"])
plt.yticks([-1.0,0.0,1.0,2.0,3.0,4.0],[r"-1.0",r"0.0",r"1.0",r"2.0",r"3.0",r"4.0",])


#设置文字描述注释:对变量x,y进行描述,注释。
plt.title(r"$the  function  figure  of  cos(),  sin()  and  sqrt()$",fontsize=19)
#labelpad 标注x,y注释离x,y距离
#fontsize 字体大小
plt.xlabel(r"$the  input  value  of  x$",fontsize=18,labelpad=50.8)
plt.ylabel(r"$y=f(x)$",fontsize=18,labelpad=12.5)



#添加文字
plt.text(4,1.68,r"$x  in [0.0,  10.0]$",color='k',fontsize=15)
plt.text(4,1.38,r"$y  in [-1.0,  4.0]$",color='k',fontsize=15)

#特殊点加注释
plt.scatter([8,],[np.sqrt(8),],5,color="m")#使用三点图放大当前点
plt.annotate(r"$2sqrt{2}$",xy=(8,np.sqrt(8)),xytext=(8.5,2.2),fontsize=16,color="#090909",
             arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.1",color="#090909"))


#设置图例和位置
plt.legend(loc="upper left")


#显示网格
plt.grid(True)

#显示绘制图片
plt.show()

常用图形,介绍

如下:

  • 曲线图:matplotlib.pyplot.plot(data)
  • 灰度图:matplotlib.pyplot.hist(data)
  • 散点图:matplotlib.pyplot.scatter(data)
  • 箱式图:matplotlib.pyplot.boxplot(data)
x = np.arange(-5,5.1)
y = x ** 2
plt.plot(x,y)
[<matplotlib.lines.Line2D at 0x24d1592a390>]

#柱状图
x = np.random.normal(size=1000)
plt.hist(x,bins=10)
(array([ 11.,  22.,  75., 147., 222., 224., 166.,  86.,  39.,   8.]),
 array([-3.14269253, -2.52579429, -1.90889605, -1.29199781, -0.67509957,
        -0.05820134,  0.5586969 ,  1.17559514,  1.79249338,  2.40939161,
         3.02628985]),
 <a list of 10 Patch objects>)

#散点图
x = np.random.normal(size=1000)
y = np.random.normal(size=1000)
plt.scatter(x,y)
<matplotlib.collections.PathCollection at 0x24d159e95c0>

#箱式图
plt.boxplot(x)
#红线中位数
#
#箱子上方  上四分位数Q3, 下方:下四分位数Q1
{'whiskers': [<matplotlib.lines.Line2D at 0x24d15a47a20>,
  <matplotlib.lines.Line2D at 0x24d15a47d68>],
 'caps': [<matplotlib.lines.Line2D at 0x24d15a47e10>,
  <matplotlib.lines.Line2D at 0x24d15a51438>],
 'boxes': [<matplotlib.lines.Line2D at 0x24d15a47630>],
 'medians': [<matplotlib.lines.Line2D at 0x24d15a51780>],
 'fliers': [<matplotlib.lines.Line2D at 0x24d15a51ac8>],
 'means': []}

箱式图科普

  • 上边缘(Q3+1.5IQR),下边缘(Q1-1.5IQR),IQR=Q3-Q1
  • 上四分位数(Q3),下四分位数(Q1)
  • 中位数
  • 异常值
  • 处理异常值时与3西格玛 标准一同:统计便捷是否受异常值影响,容忍度的大小
from PIL import Image
import matplotlib.pyplot as plt
img = Image.open("91}D3W4T$~N{Z[NGJZSLVPL.png")
plt.imshow(img)
<matplotlib.image.AxesImage at 0x24d1618eb00>

案例:自行车租赁数据分析与可视化

  • 步骤1:导入数据,做简单的数据处理
import pandas as pd
import requests
from urllib import request
import tempfile
import shutil
import zipfile
temp_dir = tempfile.mkdtemp()#建立临时目录
temp_dir
'C:\Users\XUJK~1\AppData\Local\Temp\tmp2278a01l'
# data_source = "http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip"
zipname = temp_dir + "/Bike-Sharing-Dataset.zip"#拼接文件路经
# request.urlretrieve(data_source,zipname)#获得数据

zip_ref = zipfile.ZipFile(zipname,"r")#创建一个ZipFile对象处理压缩文件
zip_ref.extractall(temp_dir)

zip_ref.close()
daily_path = "data/day.csv"
daily_data = pd.read_csv(daily_path)#读取csv文件
#把字符串数据转换成日期数据

daily_data['dteday'] = pd.to_datetime(daily_data['dteday'])

drop_list = []


drop_list = ['instant',"season","yr","mnth","holiday","workingday","weathersit","atemp","hum"]
daily_data.drop(drop_list,inplace=True,axis=1)#inplace=true,在对象上直接操作
shutil.rmtree(temp_dir)#删除临时文件目录
daily_data.head()#看看数据

dteday weekday temp windspeed casual registered cnt
0 2011-01-01 6 0.344167 0.160446 331 654 985
1 2011-01-02 0 0.363478 0.248539 131 670 801
2 2011-01-03 1 0.196364 0.248309 120 1229 1349
3 2011-01-04 2 0.200000 0.160296 108 1454 1562
4 2011-01-05 3 0.226957 0.186900 82 1518 1600
#步骤2配置参数
from __future__ import division,print_function
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np

#在notebook 中显示绘图结果
%matplotlib inline
#设置一些全局的资源参数,可以进行个性化修改
import matplotlib
#设置图片尺寸“14*7”
matplotlib.rc("figure",figsize=(14,7))
#设置字体14
matplotlib.rc("font",size=14)
#不显示顶部和右侧坐标线
matplotlib.rc("axes.spines",top=False,right=False)

#不显示网格
matplotlib.rc("axes",grid=False)
#设置背景颜色为白色
matplotlib.rc('axes',facecolor='white')
#步骤3:关联分析
#散点图
    #分析变量关系

数据分析图像选择:

  • 关联分析,数值比较:散点图,曲线图
  • 分布分析:灰度图,密度图
  • 涉及分类的分析:柱状图,箱式图
#关联分析:
#包装一个散点图的函数便于复用
def scatterplot(x_data,y_data,x_label,y_label,title):
    #创建一个绘图对象
    fig,ax=plt.subplots()
    #设置数据,点的大小,点的颜色和透明度
    ax.scatter(x_data,y_data,s=10,color="#539caf",alpha=0.75)#http://www.1141a.com/other/rgb/htm
    #添加标题和坐标说明
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
#绘制散点图
#温度和租自行车的关联关系:
#temp是温度,cnt是多少人租了自行车
scatterplot(x_data=daily_data['temp'],y_data=daily_data['cnt'],x_label="Normalized temperature (c)",y_label="Check outs",title="Number of Check Outs vs Temperature")

#线性回归
import statsmodels.api as sm#最小二乘
from statsmodels.stats.outliers_influence import summary_table#获得汇总信息

x = sm.add_constant(daily_data['temp'])#线性回归增加常数项y = kx+b
y = daily_data['cnt']
regr = sm.OLS(y,x) #普通最小二乘模型
res = regr.fit()
#从模型获得拟合数据
st,data,ss2 = summary_table(res,alpha=0.05)#置信水平 alpha=5%,st数据汇总,data数据详情,ss2数据列名

fitted_values = data[:,2]
fitted_values
array([3500.1553571 , 3628.39410788, 2518.63849718, 2542.78411874,
       2721.79773817, 2571.65792581, 2519.68772936, 2310.35926879,
       2133.27145526, 2216.28033024, 2337.52641339, 2361.67203495,
       2310.35926879, 2282.9331365 , 2764.13890512, 2753.07548226,
       2382.2980802 , 2653.46483228, 3154.88492215, 2952.29678222,
       2393.36814377, 1607.30995753, 1855.6147373 , 1861.38949871,
       2698.69470809, 2658.99654371, 2509.58056875, 2565.88050811,
       2519.68772936, 2652.50192934, 2415.5016302 , 2490.81392229,
       2941.22671865, 2456.16933823, 2617.85070456, 2764.13890512,
       3112.77618004, 3018.70388221, 2681.13003014, 2109.69693476,
       2173.2153259 , 2470.34061336, 2692.20009371, 3316.57292919,
       3970.53676843, 2981.64872041, 3328.12112388, 4108.88267983,
       4678.88138184, 3865.39440702, 3108.68550269, 3228.98860502,
       2424.72557638, 2687.1465134 , 3177.98131152, 3634.17152558,
       3090.64269362, 3495.57990791, 3919.22400227, 2985.50033221,
       3439.27996854, 2531.71405517, 2952.29678222, 3765.78375704,
       3715.0155291 , 2952.77491334, 3157.0497936 , 3179.18328003,
       3798.48261307, 3316.57292919, 3400.54470712, 3766.98572555,
       3374.31390263, 3322.34370618, 3639.94230257, 3970.53676843,
       4800.62551825, 4352.37759334, 3422.67819355, 4073.03612725,
       4147.62458196, 3518.682938  , 3107.24446862, 2968.89855722,
       2979.96198007, 2895.02729919, 2970.10052573, 3223.45689359,
       3206.85511859, 2996.56375507, 3206.85511859, 3306.46576857,
       3727.04185491, 5021.98030463, 3965.005057  , 3810.05072989,
       4119.95274339, 3444.81167997, 3489.08529353, 4048.01393198,
       5170.19431108, 4551.5988933 , 3953.93499343, 4319.17404335,
       4180.82813195, 4075.67912983, 4247.23523194, 4618.00599329,
       4573.73237973, 5165.86456817, 4263.83700693, 3450.35003211,
       4269.36871836, 5077.32398176, 5243.34173172, 5409.35948168,
       5331.88231813, 5315.28054313, 4601.40421829, 4352.37759334,
       4214.03168194, 4861.5009068 , 5309.7488317 , 3965.005057  ,
       4263.83700693, 4396.6512069 , 4667.81131827, 4723.14835469,
       4750.82019326, 4750.82019326, 4817.22729324, 4767.42196825,
       4618.00599329, 4673.3430297 , 4950.04149321, 5049.65214319,
       4944.50978178, 4867.03261823, 4739.75012969, 4778.49203182,
       5215.66989315, 5226.73995672, 5409.35948168, 5597.51071807,
       5603.0424295 , 5918.47615443, 5741.39498161, 5569.83887951,
       5647.31604306, 6084.4939044 , 6361.1923679 , 6289.25355649,
       5962.74976799, 5331.88231813, 5431.49296811, 5520.03355452,
       5719.25485447, 5912.944443  , 6366.72407933, 6582.54715429,
       6228.37816793, 6029.15686798, 5813.33379302, 5431.49296811,
       5226.73995672, 5376.15593169, 5387.21935455, 5525.57190666,
       5841.00563159, 5857.60740658, 5431.49296811, 5735.85662947,
       6084.4939044 , 6051.2903544 , 6023.62515655, 5829.93556802,
       5730.32491804, 5746.92669304, 6156.43935652, 6051.2903544 ,
       5841.00563159, 6012.55509298, 6117.69745439, 5973.81983156,
       6040.22693154, 6173.04113152, 5995.95331798, 6195.17461794,
       5924.01450657, 6084.4939044 , 6178.57284294, 6278.18349292,
       6488.47485645, 6173.04113152, 5735.85662947, 5619.6442045 ,
       5774.5985316 , 5990.42160656, 6173.04113152, 6372.26243147,
       6316.91875434, 6626.82076785, 6848.17555423, 6853.71390637,
       6726.43141782, 6150.90100438, 6339.05888148, 6361.1923679 ,
       6388.86420647, 6781.76845424, 6554.88195643, 6565.94537929,
       6339.05888148, 6416.52940432, 6073.43048154, 5929.546218  ,
       5935.07792943, 5973.81983156, 6145.36929295, 6294.78526792,
       6361.1923679 , 6305.85533149, 5979.35154299, 5918.47615443,
       5769.06017946, 5708.19143162, 5636.24597949, 5868.67082944,
       6018.08680441, 5940.61628157, 5763.52846804, 5846.53734302,
       5935.07792943, 5807.8020816 , 5470.22822953, 5686.05130448,
       5757.99675661, 5863.13911801, 5730.32491804, 5910.01588989,
       5442.56303168, 5459.16480667, 5575.37723165, 5564.30716808,
       5486.83000453, 5658.38610663, 5924.01450657, 5686.05130448,
       4800.62551825, 5193.53640673, 5424.27451634, 5531.10361809,
       5597.51071807, 5553.23710451, 5493.57032517, 5536.63532951,
       5686.05130448, 5049.65214319, 4330.24410692, 4479.66008189,
       4584.80244329, 4861.5009068 , 4944.50978178, 5165.86456817,
       5387.21935455, 5259.94350672, 5243.34173172, 5425.96125668,
       5525.57190666, 5442.56303168, 5431.49296811, 5309.7488317 ,
       4961.11155678, 3937.33321843, 3583.16423208, 3765.78375704,
       4429.8547569 , 4789.55545468, 4496.26185688, 4606.93592972,
       4678.88138184, 4806.15722967, 5005.37852963, 4977.71333178,
       4822.75900467, 5127.12930674, 4872.56432966, 4579.27073186,
       4612.47428186, 4761.89025682, 4750.82019326, 4812.17371293,
       4374.51107977, 4053.54564341, 4020.34209342, 4014.81038199,
       4291.50220479, 4346.84588191, 4429.8547569 , 4335.77581835,
       3411.60812998, 2902.49145723, 3334.13760714, 3472.48351854,
       3876.45782988, 3721.51014348, 3926.26315487, 3893.05960487,
       3383.94293213, 3527.82055495, 3837.72256846, 3926.26315487,
       3870.92611845, 3738.11191848, 3367.34115713, 3583.16423208,
       4142.08622982, 4734.21841826, 4734.21841826, 4247.23523194,
       3483.5535821 , 3035.3056572 , 3400.54470712, 4291.50220479,
       4186.35984338, 3981.60683199, 4142.08622982, 3693.83830492,
       3704.90836849, 3710.44007991, 4263.83700693, 4558.09350768,
       4258.29865479, 3372.87286856, 3289.86399358, 3300.93405714,
       3201.32340717, 3411.60812998, 3776.8471799 , 4285.97049336,
       3937.33321843, 2979.96198007, 3145.97973004, 3040.83736863,
       2681.13003014, 2797.34245511, 3090.64269362, 3323.06754357,
       4020.34209342, 3704.90836849, 2930.15665508, 2797.34245511,
       3051.9074322 , 3776.8471799 , 4059.07735484, 4025.87380484,
       3693.83830492, 3223.45689359, 3039.39633456, 3351.21751325,
       3372.87286856, 3201.0777009 , 2863.7495551 , 3284.33228215,
       3937.33321843, 3671.70481849, 3027.84149916, 2210.74861881,
       1928.51844387, 2979.96198007, 3433.74825711, 3826.65250489,
       3455.88174354, 2703.27015727, 3264.60273274, 3035.3056572 ,
       3754.71369347, 3035.3056572 , 2409.96991877, 2321.42933236,
       2476.37701875, 3691.91249902, 3228.98860502, 2476.37701875,
       2658.99654371, 2365.69630521, 2293.75749379, 2664.52825514,
       3489.08529353, 3168.11985717, 3483.5535821 , 4036.94386841,
       3311.99748   , 3090.64269362, 3002.10210721, 3804.51901846,
       4330.24410692, 3865.39440702, 3295.395705  , 2968.89855722,
       2979.96198007, 3091.36653101, 3566.56245709, 2919.09323223,
       2974.43026864, 3079.57263005, 2703.27015727, 2061.33264384,
       2692.20009371, 3334.13760714, 3527.82055495, 3317.53583214,
       3494.61700496, 3516.7571321 , 3074.04091862, 3074.04091862,
       3126.01111507, 3843.25427988, 4230.63345694, 3920.73144344,
       3145.97973004, 3068.50920719, 3649.57133207, 3599.76600708,
       3501.35732561, 4440.91817975, 3561.02410495, 3965.005057  ,
       3378.40457999, 2830.54600511, 2930.15665508, 3898.59795701,
       4717.61664326, 3942.86492986, 3123.84624361, 3616.84591319,
       4313.64233192, 4966.64326821, 5016.4485932 , 4916.83794322,
       4108.88267983, 4629.07605685, 4352.37759334, 4833.82906824,
       4938.97142965, 4745.28848183, 4894.7044568 , 5210.13818173,
       4551.5988933 , 4119.95274339, 4175.28977981, 3361.80280499,
       4429.8547569 , 4496.26185688, 3671.70481849, 4031.41215698,
       4042.47557984, 4096.13251663, 4313.64233192, 4811.69558181,
       4103.3509684 , 3893.05960487, 4119.95274339, 4534.9971183 ,
       4463.05830689, 4180.82813195, 3530.23113268, 3854.32434345,
       4153.15629339, 4501.79356831, 5243.34173172, 5625.18255664,
       5254.40515458, 4291.50220479, 4523.92705474, 4712.08493184,
       4999.8468182 , 3848.79263202, 3350.73938213, 3959.46670486,
       4380.04943191, 4523.92705474, 4252.76694337, 3715.97843205,
       4258.29865479, 4297.04055693, 5287.60870457, 4961.11155678,
       4933.43971822, 5381.68764312, 5342.9523817 , 4950.04149321,
       4784.02374325, 5077.32398176, 5033.05036819, 4573.73237973,
       4756.35190469, 4961.11155678, 5282.07699314, 5021.98030463,
       5276.54528171, 5442.56303168, 5154.7945046 , 4961.11155678,
       5199.06811816, 5337.41402956, 5187.99805459, 5298.67876814,
       5342.9523817 , 5564.30716808, 5730.32491804, 5813.33379302,
       5796.73201803, 5946.147993  , 6012.55509298, 5575.37723165,
       5730.32491804, 5558.77545665, 5088.38740461, 5215.66989315,
       5182.46634316, 4806.15722967, 4894.7044568 , 5215.66989315,
       5525.57190666, 5935.07792943, 6040.22693154, 6001.48502941,
       5553.23710451, 5569.83887951, 5520.03355452, 5459.16480667,
       5409.35948168, 5149.26279317, 4988.77675463, 5785.66195446,
       6410.99769289, 6565.94537929, 6377.7941429 , 6073.43048154,
       6150.90100438, 5968.28147942, 5403.82112954, 5846.53734302,
       6189.64290651, 6754.10325639, 6294.78526792, 6632.35247928,
       6405.46598146, 6399.92762933, 6455.27130645, 6709.82964283,
       6715.36135426, 6936.72278135, 6676.62609284, 5935.07792943,
       6001.48502941, 5973.81983156, 5968.28147942, 6073.43048154,
       5885.27260444, 6167.50277938, 6283.71520435, 6648.95425427,
       6482.93650431, 6327.98881791, 5636.24597949, 5171.39627959,
       5647.31604306, 6139.83758152, 6200.70632937, 6023.62515655,
       6372.26243147, 6405.46598146, 6233.90987936, 6007.02338155,
       6067.8921294 , 5951.67970442, 5979.35154299, 6211.77639294,
       6300.31697935, 6482.93650431, 6322.45710648, 6211.77639294,
       6101.09567939, 6195.17461794, 6233.90987936, 5968.28147942,
       5813.33379302, 5868.67082944, 6001.48502941, 6040.22693154,
       5907.41273157, 5990.42160656, 6018.08680441, 5719.25485447,
       5437.02467954, 5437.02467954, 5525.57190666, 5647.31604306,
       5835.46727945, 5879.74089301, 5608.58078164, 5553.23710451,
       5885.27260444, 6051.2903544 , 5763.52846804, 5907.41273157,
       6289.25355649, 6217.30810437, 5841.00563159, 5912.944443  ,
       6034.68857941, 6106.63403153, 5841.00563159, 5885.27260444,
       5591.97900664, 5265.47521814, 5088.38740461, 5049.65214319,
       5193.53640673, 5282.07699314, 5420.42290454, 5254.40515458,
       5066.25391819, 5071.78562962, 5354.01580455, 4883.63439323,
       4844.89913181, 5193.53640673, 5531.10361809, 4728.68670683,
       4629.07605685, 4867.03261823, 5431.49296811, 5531.10361809,
       5326.3506067 , 4817.22729324, 4712.08493184, 4673.3430297 ,
       5138.1927296 , 5580.90894308, 5580.90894308, 5298.67876814,
       4894.7044568 , 3976.06847986, 3760.2454049 , 4180.82813195,
       4629.07605685, 4103.3509684 , 4119.95274339, 3826.65250489,
       4678.88138184, 4944.50978178, 4324.70575478, 4241.6968798 ,
       4684.41309327, 4955.57320464, 4429.8547569 , 4297.04055693,
       4451.98824332, 4828.29735681, 5116.05924318, 4867.03261823,
       4839.36077967, 4734.21841826, 4385.58114334, 4136.55451839,
       3327.59650779, 3588.69594351, 3644.03297993, 3572.09416851,
       3494.61700496, 3378.40457999, 3334.13760714, 3079.57263005,
       3179.18328003, 3553.32752206, 3616.36778207, 3798.98730703,
       4009.27202985, 4435.38646833, 3494.61700496, 3134.91630718,
       3350.73938213, 3505.68706853, 3372.87286856, 3489.08529353,
       3743.64362991, 3699.37665706, 3561.02410495, 3472.48351854,
       3660.63475492, 3062.97085506, 2847.1477801 , 3295.395705  ,
       3151.51808218, 3184.72163217, 3079.81833632, 3195.78505503,
       3195.78505503, 3522.28884353, 4219.56339337, 4374.51107977,
       4125.48445482, 2913.55488009, 3345.20102999, 3749.18198205,
       3765.78375704, 4108.88267983, 3561.02410495, 3190.2533436 ,
       3179.18328003, 3085.11098219, 3367.34115713, 3621.8994935 ,
       3826.65250489, 3942.86492986, 3422.67819355, 3406.07641855,
       3383.94293213, 2979.96198007, 2847.1477801 , 2750.66490453,
       3149.10750445, 2830.54600511, 2902.49145723, 2896.95310509,
       2896.95310509, 2913.55488009, 2647.92648015])
#包装曲线绘制函数
def lineplot(x_data,y_data,x_label,y_label,title):
    #创建绘图对象
    _,ax = plt.subplots()
    #绘制拟合曲线,
    ax.plot(x_data,y_data,lw=2,color="#539caf",alpha=1)
    
    #添加标题和坐标说明
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    
#调用绘图函数

lineplot(x_data=daily_data['temp'],y_data=fitted_values,x_label="Normalized temperature (c)",y_label="Check outs",title="Line of Beat Fit for Number of Check Outs VS Temperature")

#带置信区间的曲线
#data 4-6索引就是上下界
predict_mean_ci_low,predict_mean_ci_upp = data[:,4:6].T
#创建置信区间DataFrame,上下界
CI_df = pd.DataFrame(columns = ["x_data","low_CI","upper_CI"])
CI_df["x_data"] = daily_data["temp"]
CI_df["low_CI"] = predict_mean_ci_low
CI_df['upper_CI'] = predict_mean_ci_upp
CI_df.sort_values("x_data",inplace=True)#根据x_data进行排序



#绘制置信区间
def lineplotCI(x_data,y_data,sorted_x,low_CI,upper_CI,x_label,y_label,title):
    #创建绘图对象
    _,ax = plt.subplots()
    #绘制预测曲线
    ax.plot(x_data,y_data,lw=1,color="#539caf",alpha=1,label="Fit")
    #绘制置信区间,顺序填充
    ax.fill_between(sorted_x,low_CI,upper_CI,color="#539caf",alpha=0.4,label="95% CI")
    #添加标题和坐标说明
    
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    
    #显示图例,配合label参数,loc="best"自适应方式
    ax.legend(loc="best")

lineplotCI(x_data=daily_data['temp'],y_data=fitted_values,sorted_x=CI_df['x_data'],low_CI=CI_df['low_CI'],upper_CI=CI_df['upper_CI'],x_label="Normalized temperature (c)",y_label="Check outs",title="Line of Beat Fit for Number of Check Outs VS Temperature")


#双坐标曲线图
def lineplot2y(x_data,x_label,y1_data,y1_color,y1_label,y2_data,y2_color,y2_label,title):
    _,ax1 = plt.subplots()
    ax1.plot(x_data,y1_data,color=y1_color)
    #添加标题和坐标说明
    ax1.set_ylabel(y1_label,color=y1_color)
    ax1.set_xlabel(x_label)
    ax1.set_title(title)
    
    ax2 = ax1.twinx()#两个绘图对象共享横坐标轴
    ax2.plot(x_data,y2_data,color=y2_color)
    ax2.set_ylabel(y2_label,color=y2_color)
    
#调用绘图函数
#风速,日期,租车量变化
lineplot2y(x_data=daily_data['dteday'],x_label='Day',y1_data=daily_data['cnt'],y1_color="#539caf",y1_label="Check outs",y2_data=daily_data['windspeed'],y2_color="#7663b0",y2_label='Normalized widspeed',title="Check Outs and Windspeed Over Time")

分布分析

  • 灰度图
    • 粗略区间计算
#绘制灰度图的函数
def histogram(data,x_label,y_label,title):
    _,ax = plt.subplots()
    res = ax.hist(data,color="#539caf",bins=10)#设置bin的数量
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    return res
#绘图函数调用
#app注册自行车,租用自行车数量。

res = histogram(data=daily_data['registered'],x_label='Check outs',y_label='Frequency',title="Distribution of Registered Check Outs")

#组间进行分析
    #组件定量比较
    #分组粒度
    #组件聚类
#柱状图
#统计 周1-周日租出去车的量

mean_total_co_day = daily_data[["weekday","cnt"]].groupby("weekday").agg([np.mean,np.std])
mean_total_co_day.columns = mean_total_co_day.columns.droplevel()

#定义绘制柱状图函数
def barplot(x_data,y_data,error_data,x_label,y_label,title):
    _,ax = plt.subplots()
    #柱状图
    ax.bar(x_data,y_data,color="#539caf",align="center")
    
    #绘制方差
    #ls = "none"去掉bar之间的连接
    ax.errorbar(x_data,y_data,yerr=error_data,color="#297083",ls="none",lw=5)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)

#绘制函数调用
barplot(x_data=mean_total_co_day.index.values,y_data=mean_total_co_day["mean"],error_data=mean_total_co_day['std'],x_label="Day of week",y_label='Check outs',title="Total Check Out By Day of Week(0 = Sunday)")

#堆叠柱状图
mean_by_reg_co_day = daily_data[['weekday','registered','casual']].groupby('weekday').mean()
mean_by_reg_co_day['total'] = mean_by_reg_co_day['registered'] + mean_by_reg_co_day['casual']
mean_by_reg_co_day['reg_prop'] = mean_by_reg_co_day['registered']/mean_by_reg_co_day['total']
mean_by_reg_co_day['casual_prop'] = mean_by_reg_co_day['casual'] /mean_by_reg_co_day['total']

#绘制堆积柱状图
def stackedbarplot(x_data,y_data_list,y_data_names,colors,x_label,y_label,title):
    _,ax=plt.subplots()
    #循环绘制堆积柱状图
    for i in range(0,len(y_data_list)):
        if i == 0:
            ax.bar(x_data,y_data_list[i],color=colors[i],align='center',label=y_data_names[i])
        else:
            #采用堆积的方式,除了第一个分类,后面的分类都从前一个分类的柱状图接着画
            #用归一化保证最终累计结果为1
            ax.bar(x_data,y_data_list[i],color=colors[i],bottom=y_data_list[i-1],align='center',label=y_data_names[i])
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc='upper right')#设定图例位置

stackedbarplot(x_data=mean_by_reg_co_day.index.values,
               y_data_list=[mean_by_reg_co_day['reg_prop'],mean_by_reg_co_day['casual_prop']],
               y_data_names=["Registered","Casual"],
               colors=['#539caf','#7663b0'],
               x_label="Day of week",
               y_label='Proportion of check out',
               title='checkOut by Registration status and day of week (0 = Sunday)')

#注册用户租自行车多,不注册租自行车少,周六周日,这两天不注册用户租自行车多。

原文地址:https://www.cnblogs.com/xujunkai/p/12129818.html