import urllib2
在python3.3里面,用urllib.request代替urllib2
import urllib.request as urllib2
import cookielib
Python3中,import cookielib改成 import http.cookiejar
import http.cookiejar as cookielib
from urlparse import urlparse
from urllib.parse import urlparse
PermissionError: [WinError 5] 拒绝访问
这是在进程之间通信中使用windows过程中出现的问题。参阅:https://blog.csdn.net/m0_37422289/article/details/80186288
原代码:
import queue from multiprocessing.managers import BaseManager from multiprocessing import freeze_support task_number=1 task_queue=queue.Queue(task_number) result_queue=queue.Queue(task_number) def win_run(): BaseManager.register('task',callable=lambda :task_queue) BaseManager.register('result',callable=lambda :result_queue) manager=BaseManager(address=('127.0.0.1',8001),authkey='123') manager.start() if __name__=="__main__": freeze_support() win_run()
问题探讨:
在Unix/Linux下,multiprocessing模块封装了fork()调用。
Windows没有fork调用,因此,multiprocessing需要“模拟”出fork的效果,父进程所有Python对象都必须通过pickle序列化再传到子进程去。
pickling序列化中对匿名函数的不支持,导致创建进程失败
解决方案:
修改匿名函数为普通函数
为了实现windows平台对于python多进程实现的要求,并区分是自身运行还是被调用导入而运行,加入if __name__的判断。参阅:https://blog.csdn.net/qq_27017791/article/details/80212016
现代码:
import queue from multiprocessing.managers import BaseManager from multiprocessing import freeze_support task_number=1 task1=queue.Queue(task_number) result1=queue.Queue(task_number) def task_queue(): return task1 def result_queue(): return result1 def win_run(): BaseManager.register('task',callable=task_queue) BaseManager.register('result',callable=result_queue) manager=BaseManager(address=('127.0.0.1',8001),authkey='123') manager.start() if __name__=="__main__": freeze_support() win_run()
PermissionError: [WinError 5] 拒绝访问
这是在进程使用过程中windows系统下出现的问题。
出现问题的代码部分如下:
问题出现在最后一行。
import time import queue from DistributedSpider.control.UrlManager import UrlManager from multiprocessing import freeze_support,Process from multiprocessing.managers import BaseManager from BaseSpider import DataOutput url_q=queue.Queue() result_q=queue.Queue() store_q=queue.Queue() conn_q=queue.Queue() def url_manager_proc(url_q,conn_q,root_url): url_manager=UrlManager() url_manager.add_new_url(root_url) while True: while(url_manager.get_new_url()): new_url=url_manager.get_new_url() url_q.put(new_url) print(url_manager.old_url_size()) if(url_manager.old_url_size()>2000 or not url_manager.has_new_url()): url_q.put('end') print('end') url_manager.save_process('new_url.txt',url_manager.new_urls) url_manager.save_process('old_url.txt',url_manager.old_urls) return try: if not conn_q.empty(): urls=conn_q.get() url_manager.add_new_urls(urls) except BaseException: time.sleep(0.1) if __name__=='__main__': freeze_support() url='https://baike.baidu.com/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711?fr=aladdin' url_manager=Process(target=url_manager_proc,args=(url_q,conn_q,url,))
处理方案:参阅:https://blog.csdn.net/weixin_41935140/article/details/81153611
将创建进程的函数参数中涉及到自定义的类,修改到函数内部而不是作为参数传递。
def url_manager_proc(root_url): url_manager=UrlManager() url_manager.add_new_url(root_url) while True: while(url_manager.get_new_url()): new_url=url_manager.get_new_url() url_q.put(new_url) print(url_manager.old_url_size()) if(url_manager.old_url_size()>2000 or not url_manager.has_new_url()): url_q.put('end') print('end') url_manager.save_process('new_url.txt',url_manager.new_urls) url_manager.save_process('old_url.txt',url_manager.old_urls) return try: if not conn_q.empty(): urls=conn_q.get() url_manager.add_new_urls(urls) except BaseException: time.sleep(0.1) url='https://baike.baidu.com/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711?fr=aladdin' url_manager=Process(target=url_manager_proc,args=(url,))
import cPickle
源地址:https://blog.csdn.net/zcf1784266476/article/details/70655192
import pickle
TypeError: a bytes-like object is required, not 'str'
存储前使用str.encode()