【Python】学习笔记8-多线程多进程

1、线程之间相互独立，使用多线程缩短执行时间，下面是简单爬虫实例：自动抓取网站内容（threading模块）

import threading,time,requests
def downHtml(url,name):
    content = requests.get(url).content
    f = open(name+'.html','wb')
    f.write(content)
    f.close()

urls = [
    ['baidu','http://www.baidu.com'],
    ['sogou','http://www.sogou.com'],
    ['xinliang','http://www.sina.com']
]

# 不使用多线程
# start_time = time.time()
# for url in urls:
#     downHtml(url[1],url[0])
# end_time = time.time()
# print(end_time - start_time)


#使用多线程
threads = [] #线程集
start_time = time.time()
for url in urls:#循环创建多个线程
    t = threading.Thread(target=downHtml,args =(url[1],url[0]))#创建一个线程
    t.start()
    threads.append(t)
for t in threads: #等待子线程（直到一个线程结束，等待另外一个线程，直到3个都结束，进入主线程的程序）
    t.join()
end_time = time.time()
print(end_time - start_time)

2、setDaemon（True）设置当前线程为守护线程，一旦主线程结束，子线程立刻结束，不管是否执行完

def pz():
    time.sleep(2)
    print('守护线程打印')
threads = []
for i in range(50):
    t = threading.Thread(target=pz)
    t.setDaemon(True) #设置子线程为守护线程，一旦加这行代码，只打印‘主线程打印’，不打印‘守护线程打印’
    t.start()
    threads.append(t)
# for t in threads:#如果主线程等待子线程的话，那么设置的守护线程就不好用了
#     t.join()
print('主线程打印')

3、多线程执行的函数要想获取结果，不能用return，可以写到list里面

res = []
def lida(x,y):
    res.append(x+y)
import threading
for i in range(5):
    t = threading.Thread(target=lida,args= (i,i))
    t.start()
print(res)

4、from threading import Lock线程锁，加锁是为了多线程的时候，同时修改一个数据的时候，有可能导致数据不正确，python3里面锁可以不用加，他会自动给加上

import threading
from threading import Lock

num = 0
lock = Lock()  # 申请一把锁
def run():
    global num
    lock.acquire()  # 加锁
    num += 1
    lock.release()  # 解锁
lis = []
for i in range(5):
    t = threading.Thread(target=run)
    t.start()
    lis.append(t)
for t in lis:
    t.join()
print('over', num)#输出5

5、多线程，是不能利用多核CPU的，如果想利用多核CPU的话，就得使用多进程，multiprocessing

import multiprocessing,time,threading
def run2():
    time.sleep(2)
    print('这个是多线程启动的')
def run():
    time.sleep(2)
    for i in range(5):
        t = threading.Thread(target=run2)
        t.start()
if __name__ == '__main__':
    for i in range(5):
        p = multiprocessing.Process(target= run2)
        p.start()

6、线程池，花费时间更少，更效率，放线程的一个池子threadpool

import threadpool,time
def say(num):
    print("Hello ",num)
    time.sleep(2)
res = list(range(101))
pool = threadpool.ThreadPool(10)##创建一个线程池，10为创建10个线程，线程多，时间少效率高
reqs = threadpool.makeRequests(say,res)#生成线程要执行的所有线程，res是个list，将所有的请求参数放到list中，当执行的参数只有一个
for req in reqs:
    pool.putRequest(req) #实际才去执行
pool.wait() #等待 其他线程结束

7、自己封装的线程池

import  threadpool
class MyPool(object):
    def __init__(self,func,size=20,data=None):
        self.func = func
        self.size = size
        self.data = data
    def pool(self):
        pool = threadpool.ThreadPool(self.size)
        reqs = threadpool.makeRequests(self.func,self.data)#生成请求，分配数据
        [pool.putRequest(req) for req in reqs]#执行函数
        pool.wait()#等待函数执行完成
def down(num):
    print(num)
my = MyPool(func=down,data=[1,2,3,4,5,6,7])
my.pool()