PRML-master运行notebook报错问题

问题描述

使用fetch_mldata函数一直下载不下来数据集

from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')

使用时报错问题

D:CodeSoftwareanacondalibsite-packagessklearnutilsdeprecation.py:85: DeprecationWarning: Function fetch_mldata is deprecated; fetch_mldata was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
warnings.warn(msg, category=DeprecationWarning)
D:CodeSoftwareanacondalibsite-packagessklearnutilsdeprecation.py:85: DeprecationWarning: Function mldata_filename is deprecated; mldata_filename was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
warnings.warn(msg, category=DeprecationWarning)
TimeoutError Traceback (most recent call last)
D:CodeSoftwareanacondaliburllib equest.py in do_open(self, http_class, req, **http_conn_args)
1316 h.request(req.get_method(), req.selector, req.data, headers,
-> 1317 encode_chunked=req.has_header('Transfer-encoding'))
1318 except OSError as err: # timeout error
D:CodeSoftwareanacondalibhttpclient.py in request(self, method, url, body, headers, encode_chunked)
1228 """Send a complete request to the server."""
-> 1229 self._send_request(method, url, body, headers, encode_chunked)
1230
D:CodeSoftwareanacondalibhttpclient.py in _send_request(self, method, url, body, headers, encode_chunked)
1274 body = _encode(body, 'body')
-> 1275 self.endheaders(body, encode_chunked=encode_chunked)
1276
D:CodeSoftwareanacondalibhttpclient.py in endheaders(self, message_body, encode_chunked)
1223 raise CannotSendHeader()
-> 1224 self._send_output(message_body, encode_chunked=encode_chunked)
1225
D:CodeSoftwareanacondalibhttpclient.py in _send_output(self, message_body, encode_chunked)
1015 del self._buffer[:]
-> 1016 self.send(msg)
1017
D:CodeSoftwareanacondalibhttpclient.py in send(self, data)
955 if self.auto_open:
--> 956 self.connect()
957 else:
D:CodeSoftwareanacondalibhttpclient.py in connect(self)
927 self.sock = self._create_connection(
--> 928 (self.host,self.port), self.timeout, self.source_address)
929 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
D:CodeSoftwareanacondalibsocket.py in create_connection(address, timeout, source_address)
726 if err is not None:
--> 727 raise err
728 else:
D:CodeSoftwareanacondalibsocket.py in create_connection(address, timeout, source_address)
715 sock.bind(source_address)
--> 716 sock.connect(sa)
717 # Break explicitly a reference cycle
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
in
----> 1 mnist = fetch_mldata("MNIST original")
2 # from keras.datasets import mnist
3 # mnist = mnist.load_data()
4 # # Load the dataset
5 # # (X_train, ), (, _) = mnist.load_data("E:/VSCodeWorkspace/algorithm/data/mnist.npz")
D:CodeSoftwareanacondalibsite-packagessklearnutilsdeprecation.py in wrapped(args, **kwargs)
84 def wrapped(
args, **kwargs):
85 warnings.warn(msg, category=DeprecationWarning)
---> 86 return fun(*args, **kwargs)
87
88 wrapped.doc = self._update_doc(wrapped.doc)
D:CodeSoftwareanacondalibsite-packagessklearndatasetsmldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
124 urlname = MLDATA_BASE_URL % quote(dataname)
125 try:
--> 126 mldata_url = urlopen(urlname)
127 except HTTPError as e:
128 if e.code == 404:
D:CodeSoftwareanacondaliburllib equest.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
D:CodeSoftwareanacondaliburllib equest.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
D:CodeSoftwareanacondaliburllib equest.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
D:CodeSoftwareanacondaliburllib equest.py in call_chain(self, chain, kind, meth_name, args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(
args)
504 if result is not None:
505 return result
D:CodeSoftwareanacondaliburllib equest.py in http_open(self, req)
1343
1344 def http_open(self, req):
-> 1345 return self.do_open(http.client.HTTPConnection, req)
1346
1347 http_request = AbstractHTTPHandler.do_request

D:CodeSoftwareanacondaliburllib equest.py in do_open(self, http_class, req, **http_conn_args)
1317 encode_chunked=req.has_header('Transfer-encoding'))
1318 except OSError as err: # timeout error
-> 1319 raise URLError(err)
1320 r = h.getresponse()
1321 except:
URLError:

解决

这个地方将mnist数据集下载下来,之后放到相应文件夹下

相应文件夹查询方式:

from sklearn.datasets.base import get_data_home
print (get_data_home())

如我的电脑上的目录为:C:UsersAdministratorscikit_learn_datamldata
那么需要将下载下来的数据集 mnist-original.mat 放到这个目录下

问题参考

教程网址:
https://www.e-learn.cn/content/qita/1963569
https://github.com/ageron/handson-ml/issues/143

数据集下载地址:
https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat

原文地址:https://www.cnblogs.com/philokami/p/11745314.html