Python 数据可视化

安装Matplotlib 库

python -m pip install matplotlib

绘制简单折线图

importmatplotlib.pyplotasplt

#导入matplotlib的pyplot

 

squares=[1,4,9,16,25]

fig,ax=plt.subplots()#其中fig表示整张图片；ax表示图片中的各个图表；subplots在一张图中绘制一个或多个图表

ax.plot(squares)#根据给定的数据以有意义的方式绘制图表

 

plt.show()#打开matplotlib查看器并显示绘制的图表

修改标签文字和线条粗细的复杂折线图

import matplotlib.pyplot as plt

# 导入matplotlib的pyplot

 

input_values=[1,2,3,4,5]

squares=[1,4,9,16,25]

 

plt.style.use('seaborn') # 使用图表内置样式

fig,ax=plt.subplots() # 其中fig表示整张图片；ax表示图片中的各个图表； subplots 在一张图中绘制一个或多个图表

ax.plot(input_values,squares,linewidth=3) # 根据给定的数据以有意义的方式绘制图表

 

 

plt.rcParams['font.sans-serif']=['SimHei'] #显示中文标签

plt.rcParams['axes.unicode_minus']=False   #这两行需要手动设置

 

# 设置图表标题，坐标轴标签

ax.set_title("平方数",fontsize=24)

ax.set_xlabel("值",fontsize=14)

ax.set_ylabel("值的平方",fontsize=14)

 

# 设置刻度标记的大小

ax.tick_params(axis='both',labelsize=14)

 

plt.show() #打开matplotlib 查看器并显示绘制的图表

散点图

import matplotlib.pyplot as plt

# 导入matplotlib的pyplot

 

x_values=range(1,100)

y_values=[x**2 for x in x_values]

 

plt.style.use('seaborn') # 使用图表内置样式

fig,ax=plt.subplots() # 其中fig表示整张图片；ax表示图片中的各个图表； subplots 在一张图中绘制一个或多个图表

# ax.scatter(x_values,y_values,c='red',s=3) # 使用scatter绘制散点图，c 点的颜色，s 点的尺寸

 

# 使用scatter绘制散点图，c 点的颜色，s 点的尺寸，cmap 使用颜色映射，从浅到深

ax.scatter(x_values,y_values,c=y_values,cmap=plt.cm.Blues,s=3)

 

plt.rcParams['font.sans-serif']=['SimHei'] #显示中文标签

plt.rcParams['axes.unicode_minus']=False   #这两行需要手动设置

 

# 设置图表标题，坐标轴标签

ax.set_title("平方数",fontsize=24)

ax.set_xlabel("值",fontsize=14)

ax.set_ylabel("值的平方",fontsize=14)

# 设置刻度标记的大小

ax.tick_params(axis='both',which='major',labelsize=14)

# 设置每个坐标轴的取值范围

ax.axis([0,110,0,11000])

 

# plt.show()

# 自动保存图片，这里show之后再save就是另外一张新的图了，一般是空白的；也有可能是程序再show中断

plt.savefig('squares_plot.png',bbox_inches='tight') # 第一个参数为文件名，第二个参数表示将图表多余的空白区域拆剪掉

随机漫步

from random import choice

import matplotlib.pyplot as plt

 

class RandomWalk:

    """生成一个随机漫步数据的类"""

    def __init__(self,num_points=5000):

        self.num_points=num_points

 

        self.x_values=[0]

        self.y_values=[0]

 

    def fill_walk(self):

        while len(self.x_values)<self.num_points:

            # 通过choice随机方向和距离

            x_direction=choice([1,-1])

            x_distance=choice([0,1,2,3,4])

            x_step=x_direction*x_distance

 

            y_direction = choice([1, -1])

            y_distance = choice([0, 1, 2, 3, 4])

            y_step = y_direction * y_distance

 

            # 拒绝原地踏步

            if(x_step==0 and y_step==0):

                continue

 

            # 计算下一个点的x值和y值

            x=self.x_values[-1]+x_step

            y=self.y_values[-1]+y_step

 

            self.x_values.append(x)

            self.y_values.append(y)

 

if __name__=='__main__':

 

    while True:

        rw=RandomWalk(50_000)

        rw.fill_walk()

 

        plt.style.use('classic')

        fig, ax = plt.subplots(figsize=(6, 4.5),dpi=128) # figsize指定生成图形的尺寸，dpi表示分辨率

        point_numbers=range(rw.num_points)

        ax.scatter(rw.x_values,rw.y_values,c=point_numbers,cmap=plt.cm.Blues,edgecolor='none',s=1)

        # edgecolor 删除每个点周围的轮廓

 

        # 突出起始点和终止点

        ax.scatter(0,0,c='green',edgecolor='none',s=100)

        ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red',edgecolor='none',s=100)

 

        # 隐藏坐标轴

        ax.get_xaxis().set_visible(False)

        ax.get_yaxis().set_visible(False)

        plt.show()

 

        keep_running=input("Make anthoer walk?(y/n): ")

        if(keep_running=='n'):

            break

使用Plotly模拟投骰子
- 安装plotly

python -m pip install plotly

程序代码

from random import randint

# 导入绘制直方图的库

from plotly.graph_objs import Bar,Layout

from plotly import offline

 

 

class Die:

    """表示一个骰子的类"""

 

    def __init__(self,num_sides=6):

        self.num_sides=num_sides

 

    def roll(self):

        return randint(1,self.num_sides)

 

 

if __name__ == '__main__':

    # 存储变量

    die_1=Die()

    die_2=Die(8)

    results=[]

 

    # 生成随机数

    for roll_num in range(50_000):

        result=die_1.roll()+die_2.roll()

        results.append(result)

    # print(results)

 

    # 分析结果，每类计数

    frequencies=[]

    max_result=die_1.num_sides+die_2.num_sides

    for value in range(2,max_result+1):

        frequency=results.count(value) #计数

        frequencies.append(frequency)

    print(frequencies)

 

    # 对结果进行可视化

    x_values=list(range(2,max_result+1))

    data=[Bar(x=x_values,y=frequencies)]

 

    x_axis_config={'title':'结果','dtick':1} # dtick 指定x轴显示的刻度间距

    y_axis_config={'title':'结果的频率'}

    my_layout=Layout(title='掷两个D6 1000次骰子的结果',xaxis=x_axis_config,yaxis=y_axis_config)

 

    offline.plot({'data':data,'layout':my_layout},filename='d6_d8.html') # 在桌面生成一个html文件

处理CSV文件数据

import csv

from datetime import datetime

 

import matplotlib.pyplot as plt

 

filename='weather.csv' # 地点        日期        最高温度        最低温度        平均温度

with open(filename,encoding='UTF-8') as f:

    reader=csv.reader(f)

    reader_row=next(reader)

 

    # for index,column_header in enumerate(reader_row): # 对列表调用enumerate 获取其索引和值

    #     print(index,column_header)

 

    dates,highs,lows=[],[],[]

    for row in reader:

        current_date=datetime.strptime(row[1],'%Y-%m-%d')

        try:

            high=int(row[2]) # 行号row索引从0开始

            low=int(row[3])

        except ValueError:

            print(f"Missing data for {current_date}")

        else:

            dates.append(current_date)

            highs.append(high)

            lows.append(low)

 

    print(dates)

    print(highs)

 

 

# 根据最高温绘制图形

plt.style.use('seaborn')

fig,ax=plt.subplots()

ax.plot(dates,highs,c='red',alpha=0.5) # alpha 指透明度 0 为完全透明

ax.plot(dates,lows,c='green',alpha=0.5)

ax.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1) # 区域颜色填充

 

plt.rcParams['font.sans-serif']=['SimHei'] #显示中文标签

plt.rcParams['axes.unicode_minus']=False   #这两行需要手动设置

 

# 设置图形的格式

ax.set_title('每日最高温度',fontsize=24)

ax.set_xlabel('',fontsize=16)

fig.autofmt_xdate()

ax.set_ylabel("温度（C）",fontsize=16)

ax.tick_params(axis='both',which='major',labelsize=16)

 

plt.show()

数据文件weather.csv utf编码

地点,日期,最高温度,最低温度,平均温度

武汉,2020-05-11,26,24,25

武汉,2020-05-12,32,25,26

武汉,2020-05-13,40,20,27

武汉,2020-05-14,37,21,28

武汉,2020-05-15,26,21,29

武汉,2020-05-16,38,32,30

武汉,2020-05-17,22,20,31

武汉,2020-05-18,28,27,32

武汉,2020-05-19,32,29,33

武汉,2020-05-20,34,27,25

武汉,2020-05-21,29,26,25

武汉,2020-05-22,33,28,26

武汉,2020-05-23,34,30,27

武汉,2020-05-24,,21,28

武汉,2020-05-25,34,31,25

处理json格式

主要利用之前的json load和dump 加载和写入数据，然后就是根据字典特性直接提取数据使用plotly绘图

最后了解一下pandas 数据分析工具

import pandas as pd

data=pd.DataFrame(

data=zip(lons,lats,titles,mags),columns=['经度','纬度','位置','震级'])

使用API
- 使用Web API 其是网站的一部分，用于与使用具体URL请求特定信息的程序交互

　　　　　　https://api.github.com/search/repositories?q=language:python&stars

安装 Requests 包能够让python程序能够轻松地向网站请求信息并检查返回的响应

python -m pip install requests

监视API的速率限制

　　　　　　https://api.github.com/rate_limit

调用github API并将其中的数据进行可视化

import requests

# 导入plotly

from plotly.graph_objs import Bar

from plotly import offline

 

# 执行API调用并存储响应

url='https://api.github.com/search/repositories?q=language:python&stars'

headers={'Accept':'application/vnd.github.v3+json'}

r=requests.get(url,headers=headers)

print(f"Status code: {r.status_code}")

 

# 将API响应赋给一个变量

response_dict=r.json()

print(f"Total repositories:{response_dict['total_count']}")

 

# 探索有关仓库的信息

repo_dicts=response_dict['items']

print(f"Repositories returned:{len(repo_dicts)}")

 

# 研究第一个仓库

# repo_dict=repo_dicts[0]

# print(f"
Keys:{len(repo_dict)}")

# for key in sorted(repo_dict.keys()):

#     print(key)

 

# 概述最受欢迎的仓库

# print("
 Selected information about each repository:")

# for repo_dict in repo_dicts:

#     print(f"
Name:{repo_dict['name']}")

#     print(f"Owner:{repo_dict['owner']['login']}")

#     print(f"Stars:{repo_dict['stargazers_count']}")

#     print(f"Repository:{repo_dict['html_url']}")

#     print(f"Created:{repo_dict['created_at']}")

#     print(f"Updated:{repo_dict['updated_at']}")

#     print(f"Description:{repo_dict['description']}")

 

# 处理结果

repo_links,stars,labels=[],[],[]

for repo_dict in repo_dicts:

    repo_name=repo_dict['name']

    repo_url=repo_dict['html_url']

    repo_link=f"<a href='{repo_url}'>{repo_name}</a>"

    repo_links.append(repo_link)

 

    stars.append(repo_dict['stargazers_count'])

 

    owner=repo_dict['owner']['login']

    description=repo_dict['description']

    label=f"Owner:{owner}<br />Description:{description}"

    labels.append(label)

 

# 可视化

data=[{

    'type':'bar',

    'x':repo_links,

    'y':stars,

    'hovertext':labels,

    'marker':{

        'color':'rgb(60,100,150)',

        'line':{'width':1.5,'color':'rgb(25,25,25)'} # 轮廓

    },

    'opacity':0.6,# 透明度

}]

my_layout={

    'title':'Github 上最受欢迎的Python项目',

    'titlefont':{'size':28},

    'xaxis':{

        'title':'Repository',

        'titlefont':{'size':24},

        'tickfont':{'size':14}, # 刻度标签字号

    },

    'yaxis':{

        'title':'Stars',

        'titlefont': {'size': 24},

        'tickfont': {'size': 14},

    }

}

fig={'data':data,'layout':my_layout}

offline.plot(fig,filename='python_repos.html')