搞笑视频网

import requests
import re
from lxml import etree
# 定义初始页码数
page = 1
num = 1
try:
# 定义要爬取的网址
start_url = "http://gaoxiao.52op.net/mm/index_{}.htm"
# 获取1-15页的内容
for page in range(1, 15):
url = start_url.format(page)
response = requests.get(url).content.decode('utf-8')
data_url = etree.HTML(response)
get_url = data_url.xpath('//div[@ class="a_itemBox"]/div[@ class="a_imgBox"]/a/@href')
get_title = data_url.xpath('//div[@ class="a_itemBox"]/div[@ class="a_imgBox"]/a/img/@alt')
for title_, url_ in zip(get_title,get_url):
video_title = title_
video_url = url_
print(num, video_title,video_url)
num += 1

except Exception as error:
print(error)
原文地址:https://www.cnblogs.com/LQ970811/p/10667005.html