4.22

re练习⼿刃⾖瓣TOP250电影信息

import requests
headers = {
 "user-agent": "Mozilla/5.0 (Macintosh; Intel
Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
url = "https://movie.douban.com/top250?
start=0&filter="
resp = requests.get(url, headers=headers)
print(resp.text)
obj = re.compile(r'<li>.*?<div class="item">.*?
<div class="pic">.*?<em class="">(?P<num>d+)
</em>'
 r'.*?<span class="title">(?
P<name>.*?)</span>'
 r'.*?<p class="">.*?<br>
(?
P<year>.*?)&nbsp;'
 r'.*?property="v:average">(?
P<average>.*?)</span>'
 r'.*?<span>(?P<people>d+)⼈评价
</span>', re.S)
it = obj.finditer(resp.text)
with open("movie.csv", mode="w", encoding="utf-8")
as f:
 csvwriter = csv.writer(f) # 创建csv⽂件写⼊⼯具,
也可以直接f.write()
 for item in it:
 dic = item.groupdict()
 dic['year'] = dic['year'].strip()
 csvwriter.writerow(dic.values()) # 写⼊数

原文地址:https://www.cnblogs.com/zhaoyids/p/14903831.html