爬虫-- 初级

普通同步代码 耗时

import requests
from functools import wraps
import time

def time_count(func):
    @wraps(func)
    def inner_func(*args,**kw):
        start = time.time()
        result = func(*args,**kw)
        end  = time.time()
        print('func {} cost {:.2f} s'.format(func.__name__,end-start))
        return result
    return inner_func


@time_count
def normal():
    for i in range(2):
        r = requests.get(URL)
        url = r.url
        print(url)
        
if __name__ == '__main__':
    URL = 'https://morvanzhou.github.io'
    normal()

异步 io (asyncio , aiohttp 等) 对于 IO 密集型 使用 异步 io 来处理,对于 计算密集型也就是依赖于 CPU 的,采用多进程

import asyncio
import aiohttp
from functools import wraps
import time

def time_count(func):
    @wraps(func)
    def inner_func(*args,**kw):
        start = time.time()
        t = args[0]
        result = func(*args,**kw)
        end  = time.time()
        print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
        return result
    return inner_func

@time_count
async def job(session):
    response = await session.get(URL)
    return str(response.url)

# @time_count()
async def main(loop):
    async with aiohttp.ClientSession() as session:
            tasks = [loop.create_task(job(session)) for _ in range(2)]
            finished, unfinished = await asyncio.wait(tasks)
            all_results = [r.result() for r in finished]
            print(all_results)

if __name__ == '__main__':
    URL = 'https://morvanzhou.github.io'
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(loop))

另一个 异步 举例

import asyncio
from functools import wraps
import time

def time_count(func):
    @wraps(func)
    def inner_func(*args,**kw):
        start = time.time()
        t = args[0]
        result = func(*args,**kw)
        end  = time.time()
        print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
        return result
    return inner_func

@time_count
async def job(t):
#     print('String job', t)
    await asyncio.sleep(t)
#     print('Job', t , 'takes ', t, 's')

@time_count
async def main(loop):
    tasks = [loop.create_task(job(i)) for i in range(3)]
    await asyncio.wait(tasks)
    
if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(loop))

同步举例

from functools import wraps
import time

def time_count(func):
    @wraps(func)
    def inner_func(*args,**kw):
        start = time.time()
        result = func(*args,**kw)
        end  = time.time()
        print('func {} cost {:.2f} s'.format(func.__name__,end-start))
        return result
    return inner_func

@time_count
def job(t):
#     print('String job', t)
    time.sleep(t)
#     print('Job', t , 'takes ', t, 's')

@time_count    
def main():
    [job(i) for i in range(3)]
    

if __name__ == '__main__':
    main()
    
    
原文地址:https://www.cnblogs.com/Frank99/p/10397334.html