多线程开发+多线程使用共享数据-17

进程：运行着的程序

线程：每个进程里面至少包含一个线程，线程是操作系统创建的，用来控制代码执行的数据结构，线程就像代码的执行许可证

单线程程序，主线程的入口就是代码的开头

主线程顺序往下执行，直到所有的代码都执行完

CPU核心，在一个时间点上只能执行一个线程代码

调度：操作系统不会让一个线程一直占用CPU的

进程里的多线程：

线程库：代码通过系统调用，请求OS分配一个新的线程

python里面：thread、threading都可以用来创建和管理线程，thread比较底层，threading是thread模块的扩展，提供了很多线程同步功能，使用起来更加方便强大

多线程的概念

#coding = utf-8

print ('main thread start.')

import threading

from time import sleep

def thread1_entry():

　　print ('child thread 1,strat')

　　sleep(15)

　　print('child thread 1,end')

t1 = threading.Thread (target=thread1_entry) #实例化

t1.start() 创建新的线程，这时候才有两个线程；代码通过系统调用，请求OS分配一个新的线程，与原来的线程并行的执行一段代码

sleep(10)

print('main thread end.‘）

为什么需要多线程？

多线程给一个程序并行执行代码的能力

同时处理多个任务

$convert

>>convert 1.avi

>>convert 2.avi

常见的：UI线程、任务线程 task exeute

例子：主线程等待子线程结束

threading.Thread 只是创建线程对象

start才是创建

# coding=utf8

import threading

from time import sleep, ctime获取当前时间
def thread1_entry(nsec):

　　 print('child thread 1, start at:', ctime())

　　 sleep(nsec)

　　 print('child thread 1, end at:', ctime())
def thread2_entry(nsec):

　　 print('child thread 2, start at:', ctime())

　　 sleep(nsec)

　　 print('child thread 2, end at:', ctime())
if __name__=='__main__':

　　 print('main thread start.')

　　 # 创建线程对象，指定了新线程的入口函数

　　 t1 = threading.Thread(target=thread1_entry, args=(1,))

元组

　　 t2 = threading.Thread(target=thread2_entry, args=(2,))
　　# 启动新线程

　　t1.start()

　　t2.start()
　　# 等t1 线程结束 join的作用：主线程调用jion，等待t1线程结束后往下执行

　　 t1.join()

　　# 等t2 线程结束

　　 t2.join()

　　 print('main thread end.')

局部变量：多个线程执行一个函数，每个线程都有自己的局部变量，不会有冲突

全局变量：线程可以共享的资源，控制不产生冲突---共享数据的访问

多线程使用共享数据

# coding=utf-8 
import threading 
from time import sleep
zhifubao ={     
　　'zhy'    : 2000,     
　　'liming' : 5000,     
　　'wangan'  : 15000,     
　　'zhaolei' : 6005000, 
}
 # 调用 Lock函数，返回一个锁对象 
zhifubao_lock = threading.Lock()
def thread1_didi_pay(account,amount):     
　　# 在代码访问共享对象之前 加锁     
　　# 当多个线程同时执行lock.acquire()时，     
　　# 只有一个线程能成功地获取锁，然后继续执行代码     
　　# 其他线程就继续等待，直到获得锁为止。     
　　zhifubao_lock.acquire()     
　　print('* t1: get balance from bank')     
　　balance = zhifubao[account]
   print('* t1: do something(like discount lookup) for 2 seconds')     
　　sleep(2)
   print('* t1: deduct')     
　　zhifubao[account] = balance - amount      
　　# 访问完共享对象 释放锁     
　　# 访问结束后，一定要调用Lock对象的acquire方法，进行解锁操作。     
　　# 否则其它等待锁的线程将永远等待下去，成为死线程。     
　　zhifubao_lock.release()
def thread2_yuebao_interest(account,amount):     
　　# 在代码访问共享对象之前 加锁     
　　zhifubao_lock.acquire()     
　　print('$ t2: get balance from bank')     
　　balance = zhifubao[account]
   print('$ t2: do something2... for 1 seconds')     
　　sleep(1)
   print('$ t2: add')     
　　zhifubao[account] = balance + amount      
　　# 访问完共享对象 释放锁     
　　zhifubao_lock.release()
t1 = threading.Thread(target=thread1_didi_pay,    args=('zhy',10)) 
t2 = threading.Thread(target=thread2_yuebao_interest, args=('zhy',10)) 
t1.start() 
t2.start() 
t1.join() 
t2.join() 
print('finally, zhy balance is %s' % zhifubao['zhy'])
条件变量 
生产者、消费者
　　一个线程负责让用户输入命令，存入一个List中
　　另一个线程负责从List中取出命令，执行命令
　　用户输入命令的速度和执行产生命令的速度，谁快谁慢很难说
负责让用户输入命令的线程：生产者，产生命令存入列表中
负责执行命令的线程：消费者，取出列表中的命令
有先后顺序

# coding=utf-8 
import threading,time 
from random import randint
# 存放共享资源的 列表 
commandList =[]
# 创建锁对象 
cv = threading.Lock()
# 生产者线程 
def thread_producer():     
　　global  commandList
   cmdNo = 1     
　　while True:          
　　　　# 这里生产的资源，就先用一个字符串来表示         
　　　　resource = f'command_{cmdNo}'          
　　　　# 随机等待一段时间,表示 生产资源的时间，就是输入命令耗费的时间         
　　　　time.sleep(randint(1,3))
       # 生产好了后，先申请锁         
　　　　cv.acquire()
       #申请锁成功后， 资源 存放入 commandList （共享对象）中         
　　　　commandList.append(resource)
       print('produce resource %s' % resource)         
　　　　# 释放锁         
　　　　cv.release()
       cmdNo += 1   
# 消费者线程， 
def thread_consumer ():     
　　global  commandList
    while True:         
　　　　# 先申请锁         
　　　　cv.acquire()
       resource = None         
　　　　# 拿出 生产者线程 产生的一个资源，也就是一个命令         
　　　　if commandList:             
　　　　resource = commandList[0]             
　　　　# 表示，已经被本消费者取出该资源了             
　　　　commandList.pop(0)
       # 取出一个共享资源后释放锁(生产者线程就可以对共享资源进行操作了)         
　　　　cv.release()
       if resource != None:             
　　　　# 随机等待一段时间,表示 消费资源的时间             
　　　　time.sleep(randint(1, 3))             
　　　　print('consume resource %s' % resource)
      # 注意上面的代码，当commandList里面没有 命令的时候         
　　　　#  就会不停的执行空循环，非常耗CPU资源  
if __name__=='__main__':     
　　t1 = threading.Thread(target=thread_producer)     
　　t2 = threading.Thread(target=thread_consumer)     
　　t1.start()     
　　t2.start()     
　　t1.join()     
　　t2.join()


cpu占有率会很高，原因;消费者等待时间较长，可以改变消费者sleep时间，但这并不是一个好的方法
可以通过条件变量
条件变量
线程A(消费者）通过条件变量对象等待一个条件满足，否则就睡眠式等待
线程B（生产者）在条件满足时，通过条件变量通知唤醒线程A
线程A（消费之）接到通知，从睡眠中醒来，继续代码的执行
# coding=utf-8 
import threading,time 
from random import randint
commandList =[]
# 调用 Condition，返回一个条件对象， 该对象包含了一个锁对象 
cv = threading.Condition()
 # 消费者线程 
def thread_consumer ():     
　　global  commandList
   while True:         
　　　　# 先申请锁，条件变量中包含了锁，可以调用acquire         
　　　　cv.acquire()
       # 如果命令表为空 调用条件变量wait方法 ，该调用会释放锁,并且阻塞在此处，         
　　　　# 直到生产者 生产出资源后，调用 该条件变量的notify , 唤醒 自己         
　　　　# 一旦被唤醒, 将重新获取锁(所以生产者线程此时不能对共享资源进行操作)         
　　　　while commandList == []:             
　　　　　　cv.wait()
       resource = None         
　　　　# 拿出 生产者线程 产生的一个资源         
　　　　if commandList:             
　　　　　　resource = commandList[0]             
　　　　　　# 表示，已经被本消费者取出该资源了             
　　　　　　commandList.pop(0)
       # 取出一个共享资源后释放锁(生产者线程就可以对共享资源进行操作了)         
　　　　cv.release()
       if resource != None:             
　　　　　　# 随机等待一段时间,表示 消费资源的时间             
　　　　　　time.sleep(randint(1, 3))             
　　　　　　print('consume resource %s' % resource)
# 生产者线程 
def thread_producer():     
global  commandList
    cmdNo = 1     
　　while True:          
　　　　# 这里生产的资源，就先用一个字符串来表示         
　　　　resource = f'command_{cmdNo}'          
　　　　# 随机等待一段时间，表示生产资源的时间         
　　　　time.sleep(randint(1,3))
       # 通过条件变量 先申请锁         
　　　　cv.acquire()
       #申请锁成功后， 资源 存放入commandList 中         
　　　　commandList.append(resource)
       print('produce resource %s' % resource)
       # 随后调用notify，就像说 有任务啦，等任务的线程来处理吧。。         
　　　　# 该调用会唤醒一个 阻塞在该条件变量上等待的消费者线程         
　　　　cv.notify()
       # 当然也要释放一下condition里面的锁         
　　　　cv.release()
       cmdNo += 1   
if __name__=='__main__':     
　　t1 = threading.Thread(target=thread_producer)     
　　t2 = threading.Thread(target=thread_consumer)     
　　t1.start()     
　　t2.start()     
　　t1.join()     
　　t2.join()
其他常用线程同步技术
RLock-可重入锁
Semaphoes-信号量

12

先阅读下面关于Python requests 库的文章 ，了解 使用它去获取一个网页内容的方法。

http://docs.python-requests.org/zh_CN/latest/user/quickstart.html


然后编写一个python程序，创建两个子线程，分别到下面的网址获取文本内容

http://mirrors.163.com/centos/7.3.1611/isos/x86_64/0_README.txt
http://mirrors.163.com/centos/6.9/isos/x86_64/README.txt

主线程等待这个两个子线程获取到信息后，将其内容合并后存入名为 readme89.TXT 的文件中

参考答案，往下翻

# coding=utf8
import requests
import threading


urls = [
'http://mirrors.163.com/centos/6.9/isos/x86_64/README.txt',
'http://mirrors.163.com/centos/7.3.1611/isos/x86_64/0_README.txt'
]

# 对应urls 依次存储网页文件内容, 先创建同样个数的元素占位
fileContentList = [None for one in urls]

# 锁对象，用来控制访问 fileContentList
lock = threading.Lock()

def thread_entry(idx,url):
    print('thread #%s start' % idx)
    r = requests.get(url)

    # 注意上面的代码不应该放在获取锁的代码中
    lock.acquire()
    # 注意 r.text的类型是unicode，可以在文档中查到
    fileContentList[idx] = r.text
    lock.release()

    print('thread #%s end' % idx)


if __name__ == '__main__':
    print('main thread start.')

    threadpool = []

    for idx,url in enumerate(urls):
        t = threading.Thread(target=thread_entry,
                          args=(idx,url))
        t.start()

        threadpool.append(t)


    # 等所有 线程结束
    for t in threadpool:
        t.join()

    # 所有线程结束后，所有内容都获取到了，合并内容

    mergeTxt = '

----------------------

'.join(fileContentList)
    print(mergeTxt)

    with open('readme89.txt','w',encoding='utf8') as f:
        f.write(mergeTxt)

    print('main thread end.')