import sys
from urllib import request, parse
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
url = "https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action"
headers = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
}
data = {
"start": "20",
"limit": "20"
}
# 注意: POST data should be bytes or an iterable of bytes
# 解决办法: data = urllib.parse.urlencode(values).encode(encoding='UTF8')
data = parse.urlencode(data).encode(encoding='UTF8')
# 获取爬出内容的编码类型
type = sys.getfilesystemencoding()
req = request.Request(url, data=data, headers=headers)
# 将获取到的页面进行解码, 以解决中文十六进制编码的问题
response = request.urlopen(req).read().decode(type)
with open("doubantop250.json", "w", encoding="utf-8") as f:
f.write(response)
print("ok")