node2:/root/python/20200525#cat t600.py
import requests
import time
def download_one(url):
resp = requests.get(url)
print('Read {} from {}'.format(len(resp.content), url))
def download_all(sites):
for site in sites:
download_one(site)
def main():
sites = [
'http://192.168.137.3:9000/test111/',
'http://192.168.137.3:9000/test222/',
'http://192.168.137.3:9000/test333/'
]
start_time = time.perf_counter()
download_all(sites)
end_time = time.perf_counter()
print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))
if __name__ == '__main__':
main()
node2:/root/python/20200525#time python3 t600.py
Read 75453 from http://192.168.137.3:9000/test111/
Read 66983 from http://192.168.137.3:9000/test222/
Read 66496 from http://192.168.137.3:9000/test333/
Download 3 sites in 18.260805818950757 seconds
real 0m18.432s
user 0m0.161s
sys 0m0.023s
node2:/root/python/20200525#
接着我们再来看,多线程版本的代码实现:
node2:/root/python/20200525#cat t700.py
import concurrent.futures
import requests
import threading
import time
def download_one(url):
resp = requests.get(url)
print('Read {} from {}'.format(len(resp.content), url))
def download_all(sites):
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
executor.map(download_one, sites)
def main():
sites = [
'http://192.168.137.3:9000/test111/',
'http://192.168.137.3:9000/test222/',
'http://192.168.137.3:9000/test333/'
]
start_time = time.perf_counter()
download_all(sites)
end_time = time.perf_counter()
print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))
if __name__ == '__main__':
main()
node2:/root/python/20200525#time python3 t700.py
Read 75915 from http://192.168.137.3:9000/test111/
Read 66983 from http://192.168.137.3:9000/test222/
Read 66468 from http://192.168.137.3:9000/test333/
Download 3 sites in 7.2133595428895205 seconds
real 0m7.385s
user 0m0.151s
sys 0m0.146s
我们具体来看这段代码,它是多线程版本和单线程版的主要区别所在:
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
executor.map(download_one, sites)
这里我们创建了一个线程池,总共有 5 个线程可以分配使用。executer.map() 与前面所讲的 Python 内置的 map() 函数类似,表示对 sites 中的每一个元素,
并发地调用函数 download_one()。
多进程:
with futures.ThreadPoolExecutor(workers) as executor
=>
with futures.ProcessPoolExecutor() as executor:
node2:/root/python/20200525#cat t800.py
import concurrent.futures
import requests
import threading
import time
def download_one(url):
resp = requests.get(url)
print('Read {} from {}'.format(len(resp.content), url))
def download_all(sites):
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(download_one, sites)
def main():
sites = [
'http://192.168.137.3:9000/test111/',
'http://192.168.137.3:9000/test222/',
'http://192.168.137.3:9000/test333/'
]
start_time = time.perf_counter()
download_all(sites)
end_time = time.perf_counter()
print('Download {} sites in {} seconds'.format(len(sites), end_time - start_time))
if __name__ == '__main__':
main()