Python Numpy

Numpy 基础

1. Numpy 安装

pip install numpy
pip install numpy -i https://pypi.tuna.tsinghua.edu.cn/simple

2. Numpy 属性

import numpy as np

array = np.array([[1, 2, 3], [4, 5, 6]])

print(array)
# 获取维度
print("number of dim:", array.ndim)
# 获取行数和列数
print("shape:", array.shape)
# 获取元素个数
print("size:", array.size)

"""
运行结果：
[[1 2 3]
 [4 5 6]]
number of dim: 2
shape: (2, 3)
size: 6
"""

3. Numpy创建array

import numpy as np

t1 = np.array([1, 2, 3, 4])
print(t1)

t2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(t2)

t3 = np.zeros((3, 4))
print(t3)

t4 = np.ones((3, 4), dtype=np.int16)
print(t4)

t5 = np.empty((3, 4))
print(t5)

t6 = np.arange(10, 20, 2)
print(t6)

t7 = np.arange(12).reshape(3, 4)
print(t7)

t8 = np.linspace(1, 10, 6).reshape(2, 3)
print(t8)


"""
运行结果：
[1 2 3 4]
[[1 2 3 4]
 [5 6 7 8]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[10 12 14 16 18]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 1.   2.8  4.6]
 [ 6.4  8.2 10. ]]
"""

4. Numpy的基础运算

4.1 Numpy 数据类型

类型	类型代码	说明
int8, uint8	i1, u1	有符号和无符号的8位（1字节）整形
int16, uint16	i2, u2	有符号和无符号的16位（2字节）整形
int32, uint32	i4, u4	有符号和无符号的32位（4字节）整形
int64, uint64	i8, u8	有符号和无符号的64位（8字节）整形
float16	f2	半精度浮点数
float32	f4或f	标准的点季度浮点数，与C的float兼容
float64	f8或d	标准的点季度浮点数，与C的double和Python的float对象兼容
float128	f16或g	扩展精度浮点数
complex64,	c8	32位浮点数标识的复数
complex128	c16	64位浮点数标识的复数
complex256	c32	128位浮点数标识的复数
boll	?	存储True和Flase值得布尔类型

4.2 numpy 数据类型操作

import numpy as np

# 指定数据类型
t_1 = np.array(range(1, 5), dtype=float)
print(t_1, type(t_1), t_1.dtype)

t_2 = np.array(range(1, 5), dtype='i1')
print(t_2, type(t_2), t_2.dtype)

t_3 = np.array([1, 0, 1, 1, 0, 0], dtype=bool)
print(t_3, type(t_3), t_3.dtype)

# 修改数据类型
t_4 = t_3.astype('int8')
print(t_4, type(t_4), t_4.dtype)

"""
运行结果：
[1. 2. 3. 4.] <class 'numpy.ndarray'> float64
[1 2 3 4] <class 'numpy.ndarray'> int8
[ True False  True  True False False] <class 'numpy.ndarray'> bool
[1 0 1 1 0 0] <class 'numpy.ndarray'> int8
"""

4.3 修改浮点型的小数位数

import numpy as np
from random import random

t_1 = np.array([random() for i in range(10)], dtype=float)
print(t_1, type(t_1), t_1.dtype)

# 保留两位小数
t_2 = np.round(t_1, 2)
print(t_2)

"""
运行结果：
[0.44010709 0.57982965 0.87300702 0.39740862 0.30596835 0.08421772 0.58618834 0.61866253 0.41368359 0.32946455] <class 'numpy.ndarray'> float64
[0.44 0.58 0.87 0.4  0.31 0.08 0.59 0.62 0.41 0.33]
"""

4.4 实例

demo_01

import numpy as np

t1 = np.array([10, 20, 30, 40])
t2 = np.arange(4)

print("原始的元素: ", t1, t2)

# 对应位置的元素进行相减
print("对应位置的元素进行相减: ", t1 - t2)

# 对应位置的元素进行相加
print("对应位置的元素进行相加: ", t1 + t2)

# 对应位置的元素进行相乘
print("对应位置的元素进行相乘: ", t1 * t2)

# 输出t1的平方
print("t1的平方: ", t1 ** 2)

# 布尔索引
print("布尔索引: ", t2 < 3)


"""
运行结果：
原始的元素:  [10 20 30 40] [0 1 2 3]
对应位置的元素进行相减:  [10 19 28 37]
对应位置的元素进行相加:  [10 21 32 43]
对应位置的元素进行相乘:  [  0  20  60 120]
t1的平方:  [ 100  400  900 1600]
布尔索引:  [ True  True  True False]
"""

demo_02

import numpy as np

t1 = np.array([[1, 1], [0, 1]])
t2 = np.arange(4).reshape(2, 2)

print("原始的元素t1: ", t1)
print("原始的元素t2: ", t2)

# 对应位置的元素进行相乘
print("对应位置的元素进行相乘: ", t1 * t2)

# 矩阵乘法
print("矩阵乘法方式一: ", np.dot(t1, t2))
print("矩阵乘法方式二: ", t1.dot(t2))

"""
运行结果：
原始的元素t1:  [[1 1]
 [0 1]]
原始的元素t2:  [[0 1]
 [2 3]]
对应位置的元素进行相乘:  [[0 1]
 [0 3]]
矩阵乘法方式一:  [[2 4]
 [2 3]]
矩阵乘法方式二:  [[2 4]
 [2 3]]

"""

demo_03

import numpy as np

t1 = np.random.random((2, 4))

print(t1)
print("求和", np.sum(t1))
print("求最小值", np.min(t1))
print("求最大值", np.max(t1))

# 自定义维度
print("自定义维度求和", np.sum(t1, axis=1))
print("自定义维度求最小值", np.min(t1, axis=0))
print("自定义维度求最大值", np.max(t1, axis=1))

"""
运行结果：
[[0.71728488 0.18311745 0.78101771 0.44276308]
 [0.59118476 0.11805874 0.49797704 0.14829068]]
求和 3.479694341207028
求最小值 0.1180587424542946
求最大值 0.7810177064485218
自定义维度求和 [2.12418312 1.35551122]
自定义维度求最小值 [0.59118476 0.11805874 0.49797704 0.14829068]
自定义维度求最大值 [0.78101771 0.59118476]
"""

demo_04

import numpy as np

t1 = np.arange(2, 14).reshape((3, 4))
print(t1)

# 获取最小值的索引
print("获取最小值的索引:
 ", np.argmin(t1))

# 获取最大值的索引
print("获取最大值的索引:
 ", np.argmax(t1))

# 获取平均值
print("获取平均值:
 ", np.mean(t1))
print("获取平均值:
 ", np.average(t1))

# 获取中位数
print("获取中位数:
 ", np.median(t1))

# 逐步累加
print("逐步累加:
 ", np.cumsum(t1))

# 每两个数之间的差
print("每两个数之间的差:
 ", np.diff(t1))

# 找出非0的数
print("找出非0的数:
 ", np.nonzero(t1))

# 排序
print("排序:
 ", np.sort(t1))

# 将行变成列，将列变成行
print("将行变成列，将列变成行:
 ", np.transpose(t1))
print("将行变成列，将列变成行:
 ", t1.T)

# 将矩阵中大于9的数该为9，将小于5的数改为5
print("将矩阵中大于9的数该为9，将小于5的数改为5:
 ", np.clip(t1, 5, 9))


"""
运行结果：
[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
获取最小值的索引:
  0
获取最大值的索引:
  11
获取平均值:
  7.5
获取平均值:
  7.5
获取中位数:
  7.5
逐步累加:
  [ 2  5  9 14 20 27 35 44 54 65 77 90]
每两个数之间的差:
  [[1 1 1]
 [1 1 1]
 [1 1 1]]
找出非0的数:
  (array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=int64), array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int64))
排序:
  [[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]
将行变成列，将列变成行:
  [[ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]
 [ 5  9 13]]
将行变成列，将列变成行:
  [[ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]
 [ 5  9 13]]
将矩阵中大于9的数该为9，将小于5的数改为5:
  [[5 5 5 5]
 [6 7 8 9]
 [9 9 9 9]]
"""

4.5 Numpy 中的转置

import numpy as np

t1 = np.arange(24).reshape((4, 6))
print(t1)
print('*' * 50)

# 转置方式1（讲原始的行变成列，将列变成行）
print(t1.transpose())
print('*' * 50)

# 转置方式2
print(t1.T)
print('*' * 50)

# 转置方式3
print(t1.swapaxes(1,0))

"""
运行结果：
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
**************************************************
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]
**************************************************
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]
**************************************************
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]
"""

5. Numpy索引

import numpy as np

# 一维数组
t1 = np.arange(3, 15)
print(t1)
print(t1[3])

# 二维数组
t2 = np.arange(3, 15).reshape(3, 4)
print(t2)
print("获取第2行第2列的元素: ", t2[1][1])
print("获取第3行第3列的元素: ", t2[2][2])
print("获取第3行第2列的元素: ", t2[2, 1])
print("获取第3行的所有元素: ", t2[2, :])
print("获取第2列的所有元素: ", t2[:, 1])
print("获取第1行的第2列到第3列的元素: ", t2[1, 1:3])


# 循环
print(t2.flatten())
for item in t2.flat:
    print(item)

"""
运行结果：
[ 3  4  5  6  7  8  9 10 11 12 13 14]
6
[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]
获取第2行第2列的元素:  8
获取第3行第3列的元素:  13
获取第3行第2列的元素:  12
获取第3行的所有元素:  [11 12 13 14]
获取第2列的所有元素:  [ 4  8 12]
获取第1行的第2列到第3列的元素:  [8 9]
[ 3  4  5  6  7  8  9 10 11 12 13 14]
3
4
5
6
7
8
9
10
11
12
13
14
"""

6. Numpy array合并

import numpy as np

t1 = np.array([1, 1, 1])
t2 = np.array([2, 2, 2])

# 上下合并
t3 = np.vstack((t1, t2))
print("上下合并:
 ", t3, t3.shape)

# 左右合并
t4 = np.hstack((t1, t2))
print("左右合并:
 ", t4, t4.shape)

# 改变维度
s1 = np.array([1, 1, 1])[:, np.newaxis]
s2 = np.array([2, 2, 2])[:, np.newaxis]

# 上下合并
s3 = np.vstack((s1, s2))
print("上下合并:
 ", s3, s3.shape)

# 左右合并
s4 = np.hstack((s1, s2))
print("左右合并:
 ", s4, s4.shape)

# 多个array合并
s5 = np.concatenate((s1, s2, s2, s1), axis=0)
print(s5)
s6 = np.concatenate((s1, s2, s2, s1), axis=1)
print(s6)


"""
运行结果：
上下合并:
  [[1 1 1]
 [2 2 2]] (2, 3)
左右合并:
  [1 1 1 2 2 2] (6,)
上下合并:
  [[1]
 [1]
 [1]
 [2]
 [2]
 [2]] (6, 1)
左右合并:
  [[1 2]
 [1 2]
 [1 2]] (3, 2)
[[1]
 [1]
 [1]
 [2]
 [2]
 [2]
 [2]
 [2]
 [2]
 [1]
 [1]
 [1]]
[[1 2 2 1]
 [1 2 2 1]
 [1 2 2 1]]
"""

7. Numpy array分割

import numpy as np

t1 = np.arange(12).reshape((3, 4))
print(t1)

# 横向分割
print("横向分割
")
print(np.split(t1, 3, axis=0))
print(np.vsplit(t1, 3))

# 纵向分割
print("纵向分割
")
print(np.split(t1, 2, axis=1))
print(np.hsplit(t1, 2))

# 不等量分割
print("不等量分割
")
print(np.array_split(t1, 3, axis=1))


"""
运行结果：
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
横向分割

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
纵向分割

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]
[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]
不等量分割

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2],
       [ 6],
       [10]]), array([[ 3],
       [ 7],
       [11]])]
"""

对于刚刚加载出来的数据,我如果只想选择其中的某一列(行)我们应该怎么做呢?

其实操作很简单,和python中列表的操作一样

import numpy as np

t1 = np.arange(64).reshape(8, 8)
print("原始数据为:
", t1)

# 取一行
print("取一行数据为:
", t1[0])

# 取一列
print("取一列数据为:
", t1[:, 2])

# 取多行
print("取多行数据为:
", t1[1:3])

# 取多列
print("取多列数据为:
", t1[:, 1:3])

# 取不连续的多行
print("取不连续的多行:
", t1[[1, 3, 5]])

# 取不连续的多列
print("取不连续的多列:
", t1[:, [1, 3, 5]])

# 取指定行指定列（取第三行，第四列的值）
print("取指定行指定列:
", t1[2, 3])

# 取多行多列（取第三行到第五行，第二列到第四列的结果）
print("取多行多列:
", t1[2:5, 1:4])

# 取多个不相邻的点(0, 0), (2, 1), (2, 3)
print("取多个不相邻的点:
", t1[[0, 2, 2], [0, 1, 3]])

"""
运行结果
原始数据为:
 [[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22 23]
 [24 25 26 27 28 29 30 31]
 [32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47]
 [48 49 50 51 52 53 54 55]
 [56 57 58 59 60 61 62 63]]
取一行数据为:
 [0 1 2 3 4 5 6 7]
取一列数据为:
 [ 2 10 18 26 34 42 50 58]
取多行数据为:
 [[ 8  9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22 23]]
取多列数据为:
 [[ 1  2]
 [ 9 10]
 [17 18]
 [25 26]
 [33 34]
 [41 42]
 [49 50]
 [57 58]]
取不连续的多行:
 [[ 8  9 10 11 12 13 14 15]
 [24 25 26 27 28 29 30 31]
 [40 41 42 43 44 45 46 47]]
取不连续的多列:
 [[ 1  3  5]
 [ 9 11 13]
 [17 19 21]
 [25 27 29]
 [33 35 37]
 [41 43 45]
 [49 51 53]
 [57 59 61]]
取指定行指定列:
 19
取多行多列:
 [[17 18 19]
 [25 26 27]
 [33 34 35]]
取多个不相邻的点:
 [ 0 17 19]
"""

8. Numpy copy&deep copy

import numpy as np

t1 = np.arange(4)
print(t1)
c1 = t1
t1[0] = 11
print(t1)
print(c1 is t1)
print(c1)


t2 = np.arange(4)
c2 = t2.copy()
t2[3] = 44
print(t2)
print(c2 is t2)
print(c2)

"""
运行结果：
[0 1 2 3]
[11  1  2  3]
True
[11  1  2  3]
[ 0  1  2 44]
False
[0 1 2 3]
"""

9. Numpy读取本地数据

轴（axis）：

　　在numpy中可以理解为方向，使用0,1,2...数字表示，对于一个一维数组，只有一个0轴，对于2维数组（shape(2, 2)），有0轴和1轴，对于三维数组（shape(2, 2, 3)），有0,1,2轴

　　有了轴的概念之后我们计算会更加方便，比如计算一个2维数组的平均值，必须制定是计算哪个方向上面的数字的平均值

语法：

np.loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None)

参数解释：

参数	解释
fname	文件，字符串或产生器，可以是.gz或bz2压缩文件
dtype	数据类型，可选，csv的字符串一什么数据类型读入数组，默认float
delimiter	分割字符串，默认是任何空格，改为逗号
skiprows	跳过前x行，一般跳过第一行表头
usecols	读取指定的列，索引，元祖类型
unpack	如果是True，读入属性讲分别写入不同数组变量，Flase读入数据只写一个数组变量，默认Flase


import numpy as np

file_path = './files/demo_001.csv'
data = np.loadtxt(file_path, dtype='int', delimiter=',', encoding='utf-8')
# 转置
data_1 = np.loadtxt(file_path, dtype='int', delimiter=',', encoding='utf-8', unpack=True)
print(data)
print('*'*50)
print(data_1)
"""
运行结果：
[[  1234   3467   3478 457889]
 [  1234   3467   3478 457889]
 [  1234   3467   3478 457889]
 [  1234   3467   3478 457889]
 [  1234   3467   3478 457889]]
**************************************************
[[  1234   1234   1234   1234   1234]
 [  3467   3467   3467   3467   3467]
 [  3478   3478   3478   3478   3478]
 [457889 457889 457889 457889 457889]]
"""