eg: Error
self._target(*self._args, **self._kwargs) TypeError: get_zonghe_answers() takes 1 positional argument but 36 were given
Instead, you should provide args
a tuple:
t = threading.Thread(target=startSuggestworker, args = (start_keyword,))
http://blog.gusibi.com/post/python-thread-note/
eg: Download data
import * #看情况自己导入 all_threads_ret = [] 用于各个进程爬取的数据 def get_zonghe_answers(url): .... data = get_data_from_url(url) all_threads_ret.append(data) def mutiprocess_thread(): import threading df = pd.read_csv('zonghe.csv', header=None) df_data = np.array(df).flatten() all_threads = [] #init thread for url in df_data: init_th = threading.Thread(target=get_zonghe_answers,args=(url,)) all_threads.append(init_th) for sth in all_threads: sth.start() for sth in all_threads: sth.join() pd.DataFrame(data=all_threads_ret).to_csv('zonghe_answer222.csv', header=0, index= 0) print(len(all_threads)) if __name__ == "__main__": mutiprocess_thread()
eg: Queue,Producter,Consumer,使用Threading模块创建
使用Threading模块创建线程,继承threading.Thread,然后重写__init__方法和run方法:这种方式是推荐的方式.
#encoding: utf-8 import urllib import threading from bs4 import BeautifulSoup import requests import os import time # 表情链接列表 FACE_URL_LIST = [] # 页面链接列表 PAGE_URL_LIST = [] # 构建869个页面的链接 BASE_PAGE_URL = 'https://www.doutula.com/photo/list/?page=' for x in range(1, 870): url = BASE_PAGE_URL + str(x) PAGE_URL_LIST.append(url) # 初始化锁 gLock = threading.Lock() # 生产者,负责从每个页面中提取表情的url class Producer(threading.Thread): def run(self): while len(PAGE_URL_LIST) > 0: # 在访问PAGE_URL_LIST的时候,要使用锁机制 gLock.acquire() page_url = PAGE_URL_LIST.pop() # 使用完后要及时把锁给释放,方便其他线程使用 gLock.release() response = requests.get(page_url) soup = BeautifulSoup(response.content, 'lxml') img_list = soup.find_all('img', attrs={'class': 'img-responsive lazy image_dta'}) gLock.acquire() for img in img_list: src = img['data-original'] if not src.startswith('http'): src = 'http:'+ src # 把提取到的表情url,添加到FACE_URL_LIST中 FACE_URL_LIST.append(src) gLock.release() time.sleep(0.5) # 消费者,负责从FACE_URL_LIST提取表情链接,然后下载 class Consumer(threading.Thread): def run(self): print '%s is running' % threading.current_thread while True: # 上锁 gLock.acquire() if len(FACE_URL_LIST) == 0: # 不管什么情况,都要释放锁 gLock.release() continue else: # 从FACE_URL_LIST中提取数据 face_url = FACE_URL_LIST.pop() gLock.release() filename = face_url.split('/')[-1] path = os.path.join('images', filename) urllib.urlretrieve(face_url, filename=path) if __name__ == '__main__': # 2个生产者线程,去从页面中爬取表情链接 for x in range(2): Producer().start() # 5个消费者线程,去从FACE_URL_LIST中提取下载链接,然后下载 for x in range(5): Consumer().start()
#! /usr/bin/env python #encoding=utf-8 import threading import time from Queue import Queue def readFile(): file_object = open('/opt/dev/python/list.dat') global queue for line in file_object: queue.put(line) class Consumer(threading.Thread): def run(self): global queue while queue.qsize() > 0: msg = self.name + '消费了 '+queue.get() print msg time.sleep(0.01) queue = Queue() def main(): readFile() for i in range(5): c = Consumer() c.start() if __name__ == '__main__': main()