1、介绍了NumPy的基础内容,主要包括数组及其索引、数组运算、数组读/写以及常用的统计与分析方法
# 1、导入模块 import numpy as np import csv # 2、获取数据 xiaoqu_data = [] with open('./xiaoqu_data/1xiaoqu_shijiazhuang2.csv','rt',encoding='UTF-8') as csvfile: # 使用csv.renader读取csvfile中的文件 csv_reader = csv.reader(csvfile) # 读取第一行各列的标题 xiaoqu_header = next(csv_reader) # 将csv文件中的数据保存到birth_data中 for row in csv_reader: xiaoqu_data.append(row) # print(xiaoqu_data) # 3、数据清理:去掉索引号 xiaoqu_list = [] for row in xiaoqu_data: xiaoqu_list.append(tuple(row[1:])) # print(xiaoqu_list) # 4、数据统计 # 1)创建数据类型 datatype = np.dtype([("name", np.str_,40), ("guanzhu", np.str_,40), ("junjia", np.str_,40), ("leixing", np.str_,40), ("wuyefei", np.str_,40), ("company", np.str_,40), ("kaifa", np.str_,40), ("loudong", np.str_,40), ("fangwu", np.str_,40)]) # print(datatype) # 2)创建二维数组 xiaoqu_data = np.array(xiaoqu_list, dtype=datatype) # print(xiaoqu_data) # 3)将待处理数据类型转化为int类型 guanzhu = xiaoqu_data["guanzhu"].astype(float) print(guanzhu) # 4)排序 sort1 = np.sort(guanzhu) print(sort1) # 5)数据去重 unique1 = np.unique(guanzhu) print(unique1) # 6)对指定列求和、均值、标准差、方差、最小值、最大值 print(np.sum(guanzhu)) print(np.mean(guanzhu)) print(np.std(guanzhu)) print(np.var(guanzhu)) print(np.min(guanzhu)) print(np.max(guanzhu))