每日日报2021.6.2

今天完成内容:

学习web

 学习python爬取:爬取航班信息并存储数据库

#coding:utf-8
import requests
from lxml import etree
import pymysql
import random
def create():
db = pymysql.connect(host="localhost", user="root", password="123456", db="jichang") # 连接数据库
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS LINE")

sql = """CREATE TABLE LINE (
ID INT PRIMARY KEY AUTO_INCREMENT,
dp CHAR(255), # 出发地
ap CHAR(255), # 目的地
ndate CHAR(255), #出发日期
dtime CHAR(255), # 出发时间
atime CHAR(255), # 到达时间
dname CHAR(255), # 出发机场
aname CHAR(255), # 到达机场
flightname CHAR(255), # 航班信息
flightspace CHAR(255), # 舱位
rate CHAR(255), # 折扣率
price CHAR(255) # 价格
)"""


cursor.execute(sql)

db.close()

def insert(value):
db = pymysql.connect(host="localhost", user="root", password="123456", db="jichang")
cursor = db.cursor()
sql = "INSERT INTO LINE(dp,ap,ndate,dtime,atime,dname,aname,flightname,flightspace,rate,price) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
try:
cursor.execute(sql, value)
db.commit()
print('插入数据成功')
except:
db.rollback()
print("插入数据失败")
db.close()




def chaxun(start,end,date):
cookies = {
'arrCityPy': start,
'depCityPy': end,
}

headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '^\^',
'sec-ch-ua-mobile': '?0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
}

params = (
('unionId', '427'),
('godate', date),
('searchType', '0'),
)

response = requests.get(f'https://jipiao.114piaowu.com/{start}-{end}.html', headers=headers, params=params, cookies=cookies)
# print(response.text)
return response.content.decode('utf-8')

def jianxi(html,start,end,time):
xp = etree.HTML(html)
hangban_data = []
hangban_list = xp.xpath('//*[@class="jp_list"]//*[@class="mainDiv66"]')
for i in range(1,len(hangban_list)+1):
start_time = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[1]/b/text()')[0] #起飞时间
start_time = ''.join(start_time.split())
end_time = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[1]/p/text()')[0] #到达时间
end_time = ''.join(end_time.split())
start_address = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[2]/p[1]/text()')[0] #出发地点
start_address = ''.join(start_address.split())
end_address = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[2]/p[2]/text()')[0] #抵达地点
end_address = ''.join(end_address.split())
hangban = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[3]/p[1]//text()') #航班
hangban = ''.join([''.join(hangban[0].split()),''.join(hangban[1].split())])
# jijian_ranyou = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[4]/p[2]/text()')[0] #机建加燃油
# jijian_ranyou = ''.join(jijian_ranyou.split())
jipiao = [] #机票
jipiao_list = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"]')
for j in range(1,len(jipiao_list)+1):
cangwei = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"][{j}]/li[2]/b/text()')[0] #舱位
cangwei = ''.join(cangwei.split())
price = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"][{j}]/li[3]/b//text()') #价格
price = ''.join([''.join(price[0].split()), ''.join(price[1].split())])
zhekou = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"][{j}]/li[3]/em/text()')[0] #折扣
zhekou = ''.join(zhekou.split())
jipiao.append({'舱位':cangwei,'价格':price,'折扣':zhekou})
value = [start,end,time,start_time,end_time,start_address,end_address,hangban,cangwei,zhekou,price]
insert(value)
# hangban_data.append({'起飞时间':start_time,'到达时间':end_time,'出发地点':start_address,'抵达地点':end_address,'航班':hangban,'机建加燃油':jijian_ranyou,'机票':jipiao})
hangban_data.append({'起飞时间': start_time, '到达时间': end_time, '出发日期': time, '出发地点': start_address, '抵达地点': end_address, '航班': hangban,'机票': jipiao})
#hangban_data= [start_time,end_time,start_address,end_address,hangban,jipiao[cangwei],jipiao[zhekou],jipiao[price]]

return hangban_data

if __name__ == '__main__':
'''
start_time = time.time()
start1 = input('请输入出发地的拼音:')
end1 = input('请输入目的地的拼音:')
date = input('请输入出发日期:')
while len(re.findall('d{4}-d{2}-d{2}',date)) ==0:
date = input('请输入出发日期:')
print('正在查询')
'''
'''
for fcity in fcitys:
start1 = fcity
for tcity in tcitys:
end1 = tcity
html = chaxun(start1,end1,date)
datas = jianxi(html,start1,end1)
print(len(datas))
if len(datas) != 0:
for data in datas:
print(data)
else:
print('未查询到相关数据')

'''
create()
date = '2021-06-17'
start1 = 'shanghai'
end1 = 'beijing'
html = chaxun(start1, end1, date)
datas = jianxi(html, start1, end1,date)
print(len(datas))
if len(datas) != 0:
for data in datas:
print(data)
else:
print('未查询到相关数据')



看视频

遇到问题:

明日目标:

学习web端的开发

学习css

原文地址:https://www.cnblogs.com/leiyu1905/p/14913731.html