day10_多线程把六个网站写到文件里

多线程把六个网站写到文件里(串行)

import requests,time,threading

def write_html(url,name):

     r = requests.get(url)

     with open(name,'w',encoding = 'utf8') as f:

          f.write(r.text)

urls =['www.nnzhp.cn','www.besttest.cn','www.imdsx.cn','sb.nnzhp.cn','bbs.besttest.cn','video.besttest.cn']

start_time = time.time()

for url in urls: # 循环每个url

    new_url = 'http://' + url #六个网站

    file_name = url + '.html' # 写入的六个html文件

    write_html(new_url,file_name) # 串行

end_time = time.time()

print('程序总共运行了', end_time - start_time)

 

多线程把六个网站写到文件里(并行)

import requests
import time
import threading


def write_html(url, name):
r = requests.get(url)
with open(name, 'w', encoding='utf8') as f:
f.write(r.text)


urls = ['www.nnzhp.cn', 'www.besttest.cn', 'www.imdsx.cn', 'sb.nnzhp.cn', 'bbs.besttest.cn', 'video.besttest.cn']
lis = [] # 存放每个线程
start_time = time.time()
for url in urls: # 循环每个url
new_url = 'http://' + url
file_name = url + '.html'
t = threading.Thread(target=write_html, args=(new_url, file_name)) # 实例化一个线程
lis.append(t)
t.start() # 启动一个线程

# 启动六个线程,让他们跑着,主线程等他们,join就是主线程在等待每个子线程执行完成,主线程等待的意思是让子线程把所有的页面都爬完,然后主线程发送邮件
for obj in lis:
obj.join() # join必须放在启动的六个线程外面

end_time = time.time()
print('程序总共运行了', end_time - start_time)
原文地址:https://www.cnblogs.com/laosun0204/p/8594555.html