Python threading

eg: Error

self._target(*self._args, **self._kwargs) TypeError: get_zonghe_answers() takes 1 positional argument but 36 were given

Instead, you should provide args a tuple:

　　t = threading.Thread(target=startSuggestworker, args = (start_keyword,))

http://blog.gusibi.com/post/python-thread-note/

eg: Download data

 import *  #看情况自己导入

all_threads_ret = [] 用于各个进程爬取的数据
def get_zonghe_answers(url):
    ....
    data = get_data_from_url(url)
    all_threads_ret.append(data)
def mutiprocess_thread():
    import threading
    df = pd.read_csv('zonghe.csv', header=None)
    df_data = np.array(df).flatten()
    all_threads = []
    #init thread
    for url in df_data:
        init_th = threading.Thread(target=get_zonghe_answers,args=(url,))
        all_threads.append(init_th)
    for sth in all_threads:
        sth.start()
    for sth in all_threads:
        sth.join()
    pd.DataFrame(data=all_threads_ret).to_csv('zonghe_answer222.csv', header=0, index= 0)
    print(len(all_threads))

if  __name__ == "__main__":
    mutiprocess_thread()

eg: Queue,Producter,Consumer,使用Threading模块创建

使用Threading模块创建线程，继承threading.Thread，然后重写__init__方法和run方法：这种方式是推荐的方式．

#encoding: utf-8
 
import urllib
import threading
from bs4 import BeautifulSoup
import requests
import os
import time
 
# 表情链接列表
FACE_URL_LIST = []
# 页面链接列表
PAGE_URL_LIST = []
# 构建869个页面的链接
BASE_PAGE_URL = 'https://www.doutula.com/photo/list/?page='
for x in range(1, 870):
    url = BASE_PAGE_URL + str(x)
    PAGE_URL_LIST.append(url)
 
# 初始化锁
gLock = threading.Lock()
 
# 生产者，负责从每个页面中提取表情的url
class Producer(threading.Thread):
    def run(self):
        while len(PAGE_URL_LIST) > 0:
            # 在访问PAGE_URL_LIST的时候，要使用锁机制
            gLock.acquire()
            page_url = PAGE_URL_LIST.pop()
            # 使用完后要及时把锁给释放，方便其他线程使用
            gLock.release()
            response = requests.get(page_url)
            soup = BeautifulSoup(response.content, 'lxml')
            img_list = soup.find_all('img', attrs={'class': 'img-responsive lazy image_dta'})
            gLock.acquire()
            for img in img_list:
                src = img['data-original']
                if not src.startswith('http'):
                    src = 'http:'+ src
                # 把提取到的表情url，添加到FACE_URL_LIST中
                FACE_URL_LIST.append(src)
            gLock.release()
            time.sleep(0.5)
 
# 消费者，负责从FACE_URL_LIST提取表情链接，然后下载
class Consumer(threading.Thread):
    def run(self):
        print '%s is running' % threading.current_thread
        while True:
            # 上锁
            gLock.acquire()
            if len(FACE_URL_LIST) == 0:
                # 不管什么情况，都要释放锁
                gLock.release()
                continue
            else:
                # 从FACE_URL_LIST中提取数据
                face_url = FACE_URL_LIST.pop()
                gLock.release()
                filename = face_url.split('/')[-1]
                path = os.path.join('images', filename)
                urllib.urlretrieve(face_url, filename=path)
 
if __name__ == '__main__':
    # 2个生产者线程，去从页面中爬取表情链接
    for x in range(2):
        Producer().start()
 
    # 5个消费者线程，去从FACE_URL_LIST中提取下载链接，然后下载
    for x in range(5):
        Consumer().start()

#! /usr/bin/env python
#encoding=utf-8
 
import threading
import time
from Queue import Queue
 
def readFile():
    file_object = open('/opt/dev/python/list.dat')
    global queue
    for line in file_object:                    
        queue.put(line)
 
class Consumer(threading.Thread):
    def run(self):
        global queue
        while queue.qsize() > 0:
            msg = self.name + '消费了 '+queue.get()
            print msg
            time.sleep(0.01)
 
queue = Queue()
def main():
    readFile()
    for i in range(5):
        c = Consumer()
        c.start()
 
if __name__ == '__main__':
    main()