第二次作业

作业一

（1）、在中国气象网（http://www.weather.com.cn）给定城市集的7日天气预报，并保存在数据库。

#！/usr/bin/env python
# _*_ coding:utf-8 _*_
# author: xm time:2020/9/30
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
import urllib.request
import sqlite3


class weatherDB:
    def openDB(self):
        self.con = sqlite3.connect("weather.db")
        self.cursor = self.con.cursor()
        try:
            self.cursor.execute(
                "create table weathers (wcity varchar(16),wdate varchar(16),wweather varchar(64),wtemp varchar(32),constraint pk_weather primary key(wcity,wdate))")
        except:
            self.cursor.execute("delete from weathers")

    def closeDB(self):
        self.con.commit()
        self.con.close()

    def insert(self, city, date, weather, temp):
        try:
            self.cursor.execute("insert into weathers (wcity,wdate,wweather,wtemp) values(?,?,?,?)",
                                (city, date, weather, temp))
        except:
            print("err")

    def show(self):
        self.cursor.execute("select * from weathers")
        rows = self.cursor.fetchall()
        print("%-16s%-16s%-32s%-16s" % ("city", "date", "weather", "temp"))
        for row in rows:
            print("%-16s%-16s%-32s%-16s" % (row[0], row[1], row[2], row[3]))


class weatherforecast():
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4209.400"}
        self.citycode = {"福州":"101230101","北京": "101010100", "上海": "101020100", "广州": "101280101", "深圳": "101280601"}

    def forecastcity(self, city):
        if city not in self.citycode.keys():
            print(city + "code not found")
            return
        url = "http://www.weather.com.cn/weather/" + self.citycode[city] + ".shtml"
        try:
            req = urllib.request.Request(url, headers=self.headers)
            data = urllib.request.urlopen(req)
            data = data.read()
            dammit = UnicodeDammit(data, ["utf-8", "gbk"])
            data = dammit.unicode_markup
            soup = BeautifulSoup(data, 'html.parser')
            lis = soup.select("ul[class='t clearfix'] li")
            for li in lis:
                try:
                    date_ = li.select('h1')[0].text
                    weather_ = li.select('p[class="wea"]')[0].text
                    temp_ = li.select('p[class="tem"] span')[0].text + '℃/' + li.select("p[class='tem'] i")[0].text
                    print(city, date_, weather_, temp_)
                    self.db.insert(city, date_, weather_, temp_)
                except:
                    print('err1')
        except:
            print('err2')

    def precess(self, cities):
        self.db = weatherDB()
        self.db.openDB()
        for city in cities:
            self.forecastcity(city)
        self.db.show()
        self.db.closeDB()

ws = weatherforecast()
ws.precess(["福州","北京", '上海', '广州', '深圳'])
print('completed')

（2）、心得体会

对着书上的打的，新的就是存储到数据库里

作业二

（1）、用requests和BeautifulSoup库方法定向爬取股票相关信息。

候选网站：东方财富网：https://www.eastmoney.com/

新浪股票：http://finance.sina.com.cn/stock/

#！/usr/bin/env python
# _*_ coding:utf-8 _*_
# author: xm time:2020/9/30
import requests
from bs4 import BeautifulSoup
import re

def get_html(url):
    # 模拟浏览器访问
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
        'accept-language': 'zh-CN,zh;q=0.9'
    }
    # print("来了来了")
    response = requests.get(url, headers=headers)  # 请求访问网站
    if response.status_code == 200:
        html = response.text  # 获取网页滴源码
        return html  #获取成功后返回
    else:
        print("获取网站信息失败！")

print("序号 股票代码   股票名称    最新报价   涨跌幅     跌涨额      成交量     成交额        振幅    最高     最低     今开    昨收")
for j in range(1,30):
    url = 'http://64.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112408828364991523157_1601811976313&pn=' + str(
        j) + '&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:13,m:0+t:80,m:1+t:2,m:1+t:23&fields=f2,f3,f4,f5,f6,f7,f8,f9,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22&_=1601811976531'
    html = get_html(url)
    pat = re.compile("[{.*?}]")  # 正则表达式
    data2 = pat.findall(html)
    datas = eval(data2[0])
    i=0
    for data in datas:
        i+=1
        print("{:<4} {:<9} {:<8} {:<9} {:<10} {:<8} {:<8} {:<14} {:<7} {:<7} {:<8} {:<6} {:<6}".format(i,data["f12"],data["f14"],data["f2"],data["f3"],data["f4"],data["f5"],data["f6"],data["f7"],data["f15"],data["f16"],data["f17"],data["f18"],chr(12288)))

(2)、心得体会

了解到了抓包，查看js,以及在分析得到的js数据格式，再利用正则进行提取所需要的，正则真的难，还有json的一些用法，还有翻页np=2

作业三

（1）、要求：根据自选3位数+学号后3位选取股票，获取印股票信息。抓包方法同作②。

候选网站：东方财富网：https://www.eastmoney.com/

新浪股票：http://finance.sina.com.cn/stock/

#！/usr/bin/env python
# _*_ coding:utf-8 _*_
# author: xm time:2020/10/7
import requests
from bs4 import BeautifulSoup
import re

def get_html(url):
    # 模拟浏览器访问
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
        'accept-language': 'zh-CN,zh;q=0.9'
    }
    # print("来了来了")
    response = requests.get(url, headers=headers)  # 请求访问网站
    if response.status_code == 200:
        html = response.text  # 获取网页滴源码
        return html  #获取成功后返回
    else:
        print("获取网站信息失败！")

print("股票代码   股票名称  最高     最低     今开")
for j in range(1,30):
    url = 'http://64.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112408828364991523157_1601811976313&pn=' + str(
        j) + '&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=m:0+t:6,m:0+t:13,m:0+t:80,m:1+t:2,m:1+t:23&fields=f2,f3,f4,f5,f6,f7,f8,f9,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22&_=1601811976531'
    html = get_html(url)
    pat = re.compile("[{.*?}]")  # 正则表达式
    data2 = pat.findall(html)
    datas = eval(data2[0])
    i=0
    for data in datas:
        i+=1
        if str(data["f12"]).endswith("114"):
            print("{:<10}{:<6}{:<8}{:<9}{:<8}".format(data["f12"],data["f14"],data["f15"],data["f16"],data["f17"]),chr(12288))
            break

(2)心得体会

在作业二的基础上加一个筛选条件即可