爬虫获取,用户,电影,简单

# requests模块使用,基于urllib
# urllib python内置的模块,也是模拟发送http请求的库
# 模拟http请求,get,post,put,delete....

import requests

# res = requests.get('https://www.baidu.com')
# res.encoding="utf-8"
# print(res.text)
# 获取网站全部文本信息


# params放入的是获取数据的条件
# headers该网站获取的需要验证的请求参数,才能访问到该有的数据,一般不知道有什么参数的网站,不知道放什么那么就都放,
# 网址后面的shihi百度特有的可看百度网站
# params判断条件要先看原网站是怎么搜索的在添加搜索条件wd就是本来百度的条件头,值你搜什么写什么 详情自己看百度然后搜索
# res = requests.get('https://www.keke234.com/',
# params={"md":"egon老师",
# "pn":1},
# headers={
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
# }
#
# )
#
# res.encoding="utf-8"
# print(res.text)
# with open("a.html","w")as f:
# f.write(res.text)
# 模拟登录
# headers = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
# 'Referer': 'http://www.aa7a.cn/user.php?&ref=http%3A%2F%2Fwww.aa7a.cn%2F',
# }
# res = requests.post("http://www.aa7a.cn/user.php",
# headers=headers,
# data={
# "username": "812548387@qq.com",
# "password": "qweqwe",
# 'captcha': 'xp7z',
# 'remember': 1,
# 'ref': 'http://www.aa7a.cn/',
# 'act': 'act_login'
# })
# cookie = res.cookies.get_dict()
# res2 = requests.get(
# 'http://www.aa7a.cn/',
# headers=headers,
# cookies=cookie, )
# if '812548387@qq.com' in res2.text:
# print("登录成功")
# else:
# print("没有登录")


import re
res = requests.get('https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=1&start=0')
reg = ' <a href="(.*?)" class="vervideo-lilink actplay">'

#通过查看视频连接得知该视频连接方式就是https://www.pearvideo.com/加上我们所获取到的文件对象内的值
obj = re.findall(reg,res.text)
for i in obj:
url = 'https://www.pearvideo.com/'+i
res1 = requests.get(url)
obj1 = re.findall('srcUrl="(.*?)"',res1.text)
name = obj1[0].rsplit('/',1)[1]#获取取该视频的名字

res2 = requests.get(obj1[0])
#iter_content将数据变成二进制,for循环中这个,不是for 循环直接contenr
with open(name,"wb") as f:
for i in res2.iter_content():
f.write(i)


原文地址:https://www.cnblogs.com/yangxinpython/p/11930273.html