天气预报爬虫小程序

Python3.5
Mac ios系统
爬取天气预报的小程序:

import requests, csv, random, time, socket
from bs4 import BeautifulSoup
import http.client

def get_content(url, data = None):
header = {
'Accept': 'text / html, application / xhtml + xml, application / xml;q = 0.9, image / webp, * / *;q = 0.8',
'Accept - Encoding':'gzip, deflate, sdch',
'Accept - Language':'zh - CN, zh;q = 0.8',
'Connection':'keep - alive',
'User - Agent': 'Mozilla / 5.0(Macintosh;Intel Mac OS X 10 11_6) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 50.0.2661.102Safari / 537.36'
}
timeout = random.choice(range(80,180))
while True:
try:
rep = requests.get(url, headers = header, timeout = timeout)
rep.encoding = 'utf-8'
break
except socket.timeout as e:
print('3:', e)
time.sleep(random.choice(range(8,15)))

except socket.error as e:
print('4:', e)
time.sleep(random.choice(range(20,60)))

except http.client.BadStatusLine as e:
print('5:', e)
time.sleep(random.choice(range(30,80)))

except http.client.ImproperConnectionState as e:
print('6:', e)
time.sleep(random.choice(range(5,15)))

return rep.text

def get_data(html_text):
finalFile = []
bs = BeautifulSoup(html_text, 'html.parser')
body = bs.body
data = body.find('div', id="15d")
ul = data.find('ul')
li = ul.find_all('li')

for day in li:
temp = []
inf = day.find_all('span')

date = inf[0].string
temp.append(date)

weather = inf[1].string
temp.append(weather)

temperature= inf[2].text
temp.append(temperature)

wind = inf[3].string
temp.append(wind)

wind1 = inf[4].string
temp.append(wind1)

finalFile.append(temp)

return finalFile

def write_data(data, name):
file_name = name
with open(file_name, 'a', errors = 'ignore', newline = '') as f:
f_csv = csv.writer(f)
f_csv.writerows(data)

if __name__ == '__main__':
url = 'http://www.weather.com.cn/weather15d/101270101.shtml'
html = get_content(url)
result = get_data(html)
write_data(result, 'content.csv')
原文地址:https://www.cnblogs.com/fredkeke/p/5767216.html