Python之爬取天气预报并生成图表

  使用Python爬虫去天气预报网站爬取天气数据存储至MySQL然后使用pyecharts实现绘图

  本次代码可以在gitee下载https://gitee.com/liuyueming/weatherSpider.git

  一,环境查看

  Python版本

C:\Users\liuym\Desktop\weatherSpider>python --version
Python 3.6.6

  MySQL版本

 mysql --version
mysql  Ver 14.14 Distrib 5.7.22, for Linux (x86_64) using  EditLine wrapper

  二,代码

  安装模块

pip3 install pymysql
pip3 install bs4
pip3 install lxml
pip3 install requests
pip3 install pyecharts

  运行过程中遇到没有安装的库使用pip install安装即可

  本次爬取的天气预报网站为 http://www.tianqihoubao.com/  

  主程序main.py

import pymysql
import requests
from bs4 import BeautifulSoup
 
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
cursor = db.cursor()
 
#获取网页信息
def get_html(url):
    html = requests.get(url)
    html.encoding = html.apparent_encoding
    soup = BeautifulSoup(html.text, 'lxml')
    return soup
 
year = ['2020']
 
month = ['01', '02', '03', '04','05', '06', '07', '08', '09', '10', '11', '12']
 
 
time = [y+x for y in year for x in month] 
for date in time:
    url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html'
    soup = get_html(url)
    sup = soup.find('table',attrs={'class':'b'})
    tr = sup.find_all('tr')
    for trl in tr[1:]:
        td = trl.find_all('td')
        href = td[0].find('a')['href'] #获取链接信息
        title = td[0].find('a')['title'] #获取名称
        weather = td[1].get_text().replace('\r\n','').replace(' ','') #获取天气状况
        wendu = td[2].get_text().strip().replace(' ','').replace('\r\n','')#获取温度
        fengli = td[3].get_text().strip().replace(' ','').replace('\r\n','') #获取风力大小       
 
        sql = """insert into weather_spider(time_local, link, weather_type, temperature, wind_power) \
                values(%s, %s, %s, %s, %s)"""
        cursor.execute(sql, (title, href, weather, wendu, fengli))
        db.commit()
db.close
print('爬取完成')

  代码解析

db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) # 数据库连接信息,根据实际情况修改

  

year = ['2020'] # 需要爬取的年份信息

  

url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' # 需要爬取的城市信息 本次为南昌

  生成html程序myVisualize.py

import pymysql
import pyecharts.options as opts
from pyecharts.charts import Line, Pie

def create_temp():
    db = pymysql.connect(host="localhist", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
    cursor.execute('SELECT * FROM weather_spider;')
    data = cursor.fetchall()
    max_temp_list = []
    min_temp_list = []
    day_list = []
    for d in data:
        max_temp_list.append(d[3].split('/')[0].replace('℃', ''))
        min_temp_list.append(d[3].split('/')[1].replace('℃', ''))
        day_list.append(d[0][:11])
    line = Line()
    line.add_xaxis(day_list)
    line.add_yaxis(series_name="最高气温", y_axis=max_temp_list, is_symbol_show = False, 
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average", name="平均值")]
        ))
    line.add_yaxis(series_name="最低气温", y_axis=min_temp_list, is_symbol_show = False, 
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average", name="平均值")]
        ))
    line.set_global_opts(yaxis_opts=opts.AxisOpts(name="温度(℃)"), 
        title_opts=opts.TitleOpts(title="南昌气温变化表"), 
        tooltip_opts=opts.TooltipOpts(trigger="axis"))
    
    line.render('南昌2020气温变化表.html')
    print('气温图生成成功')
    db.close()
    cursor.close()

def create_weather():
    db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
    attr = ["雨", "多云", "晴", "阴", "雪", "雾", "霾"]
    rain = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雨%";')
    cloud = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%多云%";')
    sun = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%晴%";')
    overcast = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%阴%";')
    snow = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雪%";')
    fog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雾%";')
    smog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霾%";')
    weather = [rain, cloud, sun, overcast, snow, fog, smog]
    pie = (
        Pie()
        .add("", [list(z) for z in zip(attr, [rain, cloud, sun, overcast, snow, fog, smog])])
        .set_global_opts(title_opts=opts.TitleOpts(title="天气占比表"))
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
    )
    
    pie.render('南昌2020天气占比表.html')
    print('天气图生成成功')
    db.close()
    cursor.close()


if __name__ == '__main__':
    create_temp()
    create_weather()

  MySQL操作(安装MySQL不详述)

  创建库

create databese weather;

  导入表

mysql -uroot -pioYbcZ1u -h127.0.0.1 weather < weather.sql

  表语句sql如下weather.sql

DROP TABLE IF EXISTS `weather_spider`;
CREATE TABLE `weather_spider` (
  `time_local` varchar(255) DEFAULT NULL,
  `link` varchar(255) DEFAULT NULL,
  `weather_type` varchar(255) DEFAULT NULL,
  `temperature` varchar(255) DEFAULT NULL,
  `wind_power` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;

  三,运行

  运行主程序

python main.py

  运行正常会往MySQL数据库写入数据,登录数据库搜索查看

 select * from weather_spider;

   运行生成html程序

python myVisualize.py
气温图生成成功
天气图生成成功

  在当前目录会生成html,打开查看

 

原文地址:https://www.cnblogs.com/minseo/p/15723258.html