这是循环请求10次页面。总时间大概是10秒左右,如果是普通的循环请求10次页面而不添加异步的话,时间大概在30秒以上,当然这个数据可能有误,因为有网速的问题存在,但大体的效果应该是不变的。
import aiohttp from bs4 import BeautifulSoup import asyncio import time async def html_list(url,headers): try: async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers, timeout=10) as response: if response.status != 200: print('请求失败:{}'.format(response.status)) return await html_list(url, headers) # 返回状态码不是200,将挂起协程并执行其他协程 else: response.encoding = 'gbk' return await response.text() # 挂起协程执行其他协程,当其他协程完成或者挂起时,继续执行此协程 except Exception as e: print(e) async def html_detail(html): html1 = BeautifulSoup(html,'lxml') html2 = html1.select('.tit') for i in html2: print(i.get_text()) def html_index(url,headers): for i in range(10): loop = asyncio.get_event_loop() # 调用协程 html = loop.run_until_complete(html_list(url,headers)) # 启动第一个协程 loop.run_until_complete(html_detail(html)) # 启动第二个协程 def main(): url = 'http://www.meizitu.com/a/pure.html' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3393.4 Safari/537.36', } loop = asyncio.new_event_loop() # 创建一个协程 html_index(url,headers) asyncio.set_event_loop(loop) # 设置协程添加到事件循环中 if __name__ == '__main__': a = time.time() main() b = time.time() print(b-a)