爬取豆瓣Top250_Ajax动态页面

爬取网址:

完整代码:

import sys
from urllib import request, parse
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

url = "https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action"

headers = {
    "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
}


data = {
    "start": "20",
    "limit": "20"

}

# 注意: POST data should be bytes or an iterable of bytes
# 解决办法: data = urllib.parse.urlencode(values).encode(encoding='UTF8')
data = parse.urlencode(data).encode(encoding='UTF8')
# 获取爬出内容的编码类型
type = sys.getfilesystemencoding()
req = request.Request(url, data=data, headers=headers)
# 将获取到的页面进行解码, 以解决中文十六进制编码的问题
response = request.urlopen(req).read().decode(type)
with open("doubantop250.json", "w", encoding="utf-8") as f:
    f.write(response)
print("ok")
View Code

参考文章:

原文地址:https://www.cnblogs.com/amou/p/9134505.html