UCI Housing数据集
- 该模块将从 https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ 下载数据集,并将训练集和测试集解析为paddle reader creator
- 每个样本都是正则化和价格编号后的特征
paddle.dataset.uci_housing:https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/data/dataset_cn/uci_housing_cn.html
import paddle
import paddle.fluid as fluid
import numpy as np
import paddle.dataset.uci_housing as uci_housing
# 返回一个reader creator,reader中的每个样本都是正则化和价格编号后的特征
train=uci_housing.train() # <function paddle.dataset.uci_housing.train.<locals>.reader()>
'''
[==================================================]i_housing/housing.data not found, downloading http://paddlemodels.bj.bcebos.com/uci_housing/housing.data
'''
test=uci_housing.test() # <function paddle.dataset.uci_housing.train.<locals>.reader()>
a_sample=next(train())
print(len(a_sample)) # 2
print(a_sample[1]) # [24.]
print(a_sample[0].shape) # (13,)
print(a_sample[0]) # [-0.0405441 0.06636364 -0.32356227 -0.06916996 -0.03435197 0.05563625 -0.03475696 0.02682186 -0.37171335 -0.21419304 -0.33569506 0.10143217 -0.21172912]
len(uci_housing.feature_names) # 13
uci_housing.feature_names
'''
['CRIM',
'ZN',
'INDUS',
'CHAS',
'NOX',
'RM',
'AGE',
'DIS',
'RAD',
'TAX',
'PTRATIO',
'B',
'LSTAT']
'''
# 13 var x ,1 var y
# uci_housing.UCI_TEST_DATA.shape:(102, 14)
# uci_housing.UCI_TRAIN_DATA.shape:(404, 14)
uci_housing.UCI_TEST_DATA
'''
array([[ 0.42616306, -0.11363636, 0.25525005, ..., -0.0686218 ,
0.40637243, 8.5 ],
[ 0.72279828, -0.11363636, 0.25525005, ..., 0.07134996,
0.28495962, 5. ],
[ 0.19222996, -0.11363636, 0.25525005, ..., 0.03415696,
0.2948934 , 11.9 ],
...,
[-0.03993221, -0.11363636, 0.02907703, ..., 0.10143217,
-0.1935172 , 23.9 ],
[-0.03938337, -0.11363636, 0.02907703, ..., 0.09273279,
-0.17033839, 22. ],
[-0.04008226, -0.11363636, 0.02907703, ..., 0.10143217,
-0.13170704, 11.9 ]])
'''