numpy---(精简)

numpy get started

导入numpy库, 并查看版本

import numpy as np
np.__version__

'1.14.3'

# pyplot显示画图, 数据分析与可视化
import matplotlib.pyplot as plt
lena = plt.imread('lena.jpg')
# type(lena)
lena

array([[[225, 138, 128],
        [224, 137, 127],
        [223, 136, 126],
        ...,
        [234, 146, 126],
        [220, 129, 110],
        [197, 104,  86]],

       [[222, 138, 127],
        [224, 137, 127],
        [224, 137, 127],
        ...,
        [235, 150, 130],
        [218, 131, 112],
        [189, 102,  83]],

       [[222, 138, 127],
        [224, 137, 127],
        [225, 138, 128],
        ...,
        [230, 148, 127],
        [215, 133, 112],
        [190, 105,  85]],

       ...,

       [[ 82,  21,  55],
        [ 81,  20,  54],
        [ 92,  28,  62],
        ...,
        [175,  71,  82],
        [175,  68,  78],
        [175,  65,  74]],

       [[ 80,  18,  55],
        [ 81,  20,  54],
        [ 94,  33,  67],
        ...,
        [177,  69,  82],
        [182,  70,  82],
        [183,  72,  81]],

       [[ 81,  19,  56],
        [ 83,  21,  58],
        [ 96,  35,  69],
        ...,
        [178,  68,  81],
        [183,  71,  83],
        [188,  74,  84]]], dtype=uint8)

lena2 = lena - 10
plt.imshow(lena2)
plt.show()

png

创建ndarray

使用np.array()由python list创建

numpy默认ndarray的所有元素的类型是相同的
如果传递的列表中包含不同的类型, 则统一为同一类型, 优先级:str > float > int

n1 = np.array([3, 1, 4, 5])
n1

array([3, 1, 4, 5])

n2 = np.array([[2, 3, 4, 5], [4, 6, 1, 9], [5, 6, 7, 8]])
n2

array([[2, 3, 4, 5],
       [4, 6, 1, 9],
       [5, 6, 7, 8]])

type(n2)
# shape是属性,不是方法
n2.shape

(3, 4)

n1.shape

(4,)

# 行, 列, 维度
# 一张二维图片转化成数组为三位数组
lena.shape

(512, 512, 3)

n3 = np.array(['ABC', 1, 3.14])
n3

array(['ABC', '1', '3.14'], dtype='<U4')

使用np.routines函数创建

np.ones(shape=(10, 8), dtype=int)

array([[1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1]])

ones = np.ones(shape=(100, 80, 3), dtype=float)
plt.imshow(ones)
plt.show()

png

np.zeros((4, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

np.full((10, 10), fill_value=1024)

array([[1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024],
       [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024]])

# np.eye(N, M=None, k=0, dtype=<class 'float'>, order='C')
# 对角线为1, 其他位置为0, 满秩矩阵
np.eye(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

#  np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)
np.linspace(0, 100, 20)

array([  0.        ,   5.26315789,  10.52631579,  15.78947368,
        21.05263158,  26.31578947,  31.57894737,  36.84210526,
        42.10526316,  47.36842105,  52.63157895,  57.89473684,
        63.15789474,  68.42105263,  73.68421053,  78.94736842,
        84.21052632,  89.47368421,  94.73684211, 100.        ])

# np.arange([start,] stop[, step,], dtype=None)
# 左闭右开
np.arange(0, 100, 5)

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80,
       85, 90, 95])

# np.random.randint(low, high=None, size=None, dtype='l')
np.random.randint(0, 150, 5)

array([127, 110,  56,  63,  77])

# np.random.randn(d0, d1, ..., dn)
# 标准正态分布
np.random.randn(100)

array([-0.41595026,  1.47042723,  0.03288821,  0.85004019,  0.7950821 ,
        0.13841712,  0.28218393,  1.22280226, -0.55662926, -0.85044176,
        0.87454005, -1.56832096,  1.69536713,  0.12126746,  1.05180469,
        0.78131875, -0.70417438, -0.58430437, -0.49943889,  0.2231934 ,
       -0.55686039, -0.48619634,  0.48127741,  0.27138361,  0.17976988,
       -0.11101901, -0.71860609,  1.2858034 ,  0.26501713,  0.15528386,
       -0.3639874 , -0.50213498, -1.3080041 , -2.35717083, -0.648195  ,
        0.24412035, -1.50979317,  1.09656183,  0.00946873, -0.73389828,
       -0.04357452,  0.80743789, -1.72143062,  0.10460993,  1.32929205,
        0.06736016, -1.56802382,  0.90329101, -0.45505224,  0.04915999,
        0.26430304, -0.40411427,  0.42802756, -1.69807546,  0.27891151,
        1.05498128,  0.94090423, -0.42022151,  1.65546614, -0.22287079,
        0.69203073,  0.96486237, -1.28087795,  0.75158138, -0.18673762,
       -0.70781096, -1.71156378,  0.65202125, -0.3525935 ,  0.4323014 ,
       -0.63716862, -0.67085324, -0.30546365,  0.39392657, -2.13986037,
       -0.0085726 , -1.67360167,  1.84832111,  0.0671747 ,  0.01600444,
        0.52551343, -0.60296408, -0.47100002, -2.18264449,  0.46744126,
        0.72398992, -1.71408793,  0.14587077, -0.18404951, -0.80683105,
        1.73309297,  0.35799329,  0.73527189,  0.13199485,  0.26461892,
        0.54344243,  0.49003007,  0.21602823, -1.22451068, -0.21714807])

# np.random.normal(loc=0.0, scale=1.0, size=None)
# scale波动
np.random.normal(loc=175, scale=1, size=100)

array([176.21310971, 172.20006366, 175.29247008, 173.66475082,
       173.68890116, 174.71321419, 175.80304124, 175.10018316,
       173.44016299, 174.80136342, 175.37660695, 177.19022468,
       176.32440094, 173.14755284, 175.06826748, 176.42310704,
       174.76973001, 172.07888002, 174.81805161, 175.85111712,
       176.1612796 , 175.9304326 , 174.08051939, 174.2482614 ,
       172.47992484, 174.73893155, 173.8072    , 173.57799107,
       173.78284387, 176.32936172, 175.17084547, 173.21273207,
       175.28091245, 174.47728685, 174.24929528, 174.37795464,
       173.35172255, 175.33469387, 174.38263904, 176.28884503,
       174.48028776, 176.07224738, 175.28880278, 177.13037103,
       171.68068476, 174.58779908, 177.3445544 , 174.96102577,
       173.9927033 , 174.81596921, 173.40709395, 175.09461029,
       174.56116781, 176.10069031, 177.34382616, 176.63857035,
       175.29170695, 173.77097116, 173.92263266, 177.1159495 ,
       175.33183934, 175.41897696, 174.01483045, 175.26064743,
       174.52707392, 174.71789507, 175.83135718, 175.3980088 ,
       175.28031481, 176.63722956, 176.14911054, 174.1617964 ,
       174.12355257, 175.97611042, 175.4970436 , 176.42210635,
       173.54120183, 174.25305399, 172.89636185, 175.76694058,
       172.44363816, 172.97763963, 173.76208303, 175.68367144,
       174.39331671, 174.26906247, 173.97178951, 174.34262788,
       174.78171771, 176.3154983 , 175.18898772, 175.03515302,
       175.01803086, 175.89679058, 174.81759265, 174.66847045,
       175.67714752, 173.83397302, 172.11278424, 174.91772609])

# 生成0到1的随机数, 左闭右开
# 使用随机数生成一张图片
r = np.random.random(size=(200, 300, 3))
plt.imshow(r)
plt.show()

png

ndarray的属性

4个必记参数: ndim: 维度
shape: 形状(各维度的长度)
size: 总长度
dtype: 元素类型

ndarray的基本操作

索引

# 二维数组索引
n5 = np.random.randint(0, 100, (3, 4))
n5

array([[44, 50, 39, 56],
       [29, 50, 49, 95],
       [11, 20, 97, 73]])

n5[0, 1]

# 三位数组索引
n6 = np.random.randint(0, 100, (3, 4, 5))
n6

array([[[83, 35, 84, 88, 18],
        [62, 37, 55, 65,  8],
        [26, 86, 50, 11, 37],
        [37, 93,  1, 86, 71]],

       [[33, 25, 72, 13, 82],
        [80, 36, 69, 37, 32],
        [43, 79, 40,  3, 46],
        [67, 10, 79, 98, 58]],

       [[44, 36, 89, 64, 86],
        [82,  9, 37, 33, 13],
        [59, 55, 45, 59, 29],
        [72, 68, 88, 23, 64]]])

# 可以看成一维和二维组合
n6[0, 3, 1]

切片

n7 = np.random.randint(150, size=10)
n7

array([ 12, 144, 141, 103,  82, 119,  85,  83,  36,  45])

# 和python list一样  左闭右开
# 一维数组切片
n7[0:5]

array([ 12, 144, 141, 103,  82])

n6.shape

(3, 4, 5)

n6

array([[[83, 35, 84, 88, 18],
        [62, 37, 55, 65,  8],
        [26, 86, 50, 11, 37],
        [37, 93,  1, 86, 71]],

       [[33, 25, 72, 13, 82],
        [80, 36, 69, 37, 32],
        [43, 79, 40,  3, 46],
        [67, 10, 79, 98, 58]],

       [[44, 36, 89, 64, 86],
        [82,  9, 37, 33, 13],
        [59, 55, 45, 59, 29],
        [72, 68, 88, 23, 64]]])

# 三位数组切片
n6[0:2]

array([[[83, 35, 84, 88, 18],
        [62, 37, 55, 65,  8],
        [26, 86, 50, 11, 37],
        [37, 93,  1, 86, 71]],

       [[33, 25, 72, 13, 82],
        [80, 36, 69, 37, 32],
        [43, 79, 40,  3, 46],
        [67, 10, 79, 98, 58]]])

n6[0:2, 1:3]

array([[[62, 37, 55, 65,  8],
        [26, 86, 50, 11, 37]],

       [[80, 36, 69, 37, 32],
        [43, 79, 40,  3, 46]]])

n6[0:2, 1:3, -2:]

array([[[65,  8],
        [11, 37]],

       [[37, 32],
        [ 3, 46]]])

# 将数据反转
n8 = np.arange(0, 10, 1)
n8

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

n8[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

n8[::-2]

array([9, 7, 5, 3, 1])

变形reshape

使用reshape函数, 注意参数是一个tuple

# a.reshape(shape, order='C')
n8.reshape((5, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

# 对图片进行reshape
lena.reshape(512 * 512 * 3)

array([225, 138, 128, ..., 188,  74,  84], dtype=uint8)

# 如果是负数, 直接转换成一维的数组ndarray
lena.reshape(-1)

array([225, 138, 128, ..., 188,  74,  84], dtype=uint8)

级联

np.concatenate() 级联需要注意的事项:
1. 级联的参数是列表, 一定要加中括号或小括号
2. 维度必须相同
3. 形状相符
4. 级联的方向默认是shape元组的第一个值代表的维度方向
5. 可以通过axis参数改变级联的方向

import numpy as np
n9 = np.random.randint(0, 10, size=(5, 5))
n9

array([[1, 4, 8, 8, 1],
       [0, 0, 8, 4, 0],
       [3, 1, 2, 3, 5],
       [9, 6, 5, 8, 0],
       [3, 6, 1, 7, 2]])

# np.concatenate((a1, a2, ...), axis=0, out=None)
np.concatenate((n9, n9))

array([[1, 4, 8, 8, 1],
       [0, 0, 8, 4, 0],
       [3, 1, 2, 3, 5],
       [9, 6, 5, 8, 0],
       [3, 6, 1, 7, 2],
       [1, 4, 8, 8, 1],
       [0, 0, 8, 4, 0],
       [3, 1, 2, 3, 5],
       [9, 6, 5, 8, 0],
       [3, 6, 1, 7, 2]])

n9.ndim

np.concatenate((n9, n9), axis=1)

array([[1, 4, 8, 8, 1, 1, 4, 8, 8, 1],
       [0, 0, 8, 4, 0, 0, 0, 8, 4, 0],
       [3, 1, 2, 3, 5, 3, 1, 2, 3, 5],
       [9, 6, 5, 8, 0, 9, 6, 5, 8, 0],
       [3, 6, 1, 7, 2, 3, 6, 1, 7, 2]])

import matplotlib.pyplot as plt
lena = plt.imread('lena.jpg')
lenas = np.concatenate((lena, lena))
plt.imshow(lenas)
plt.show()

png

np.hstack与np.vstack

水平级联与垂直级联, 处理自己, 进行维度的变更

# vertical 垂直
n10 = np.random.randint(150, size=10)
n10

array([138, 122,  78, 145,  95, 114,  98,  49, 137, 147])

n11 = np.vstack(n10)
n11

array([[138],
       [122],
       [ 78],
       [145],
       [ 95],
       [114],
       [ 98],
       [ 49],
       [137],
       [147]])

n12 = np.array([[2, 3, 4, 5, 6]])
# 可以对二维及多位数组进行降维
np.hstack(n12)

array([2, 3, 4, 5, 6])

n13 = np.array([[2, 3, 4, 5, 6], [4, 5, 6, 7, 8]])
np.hstack(n13)

array([2, 3, 4, 5, 6, 4, 5, 6, 7, 8])

np.hstack(np.hstack(lena))

array([225, 138, 128, ..., 188,  74,  84], dtype=uint8)

切分

与级联类似, 三个函数完成切分:
- np.split
- np.vsplit
- np.hsplit

# np.split(ary, indices_or_sections, axis=0)
n14 = np.random.randint(0, 150, size=(5, 7))
n14

array([[  0, 107,  40,  62, 108, 120, 130],
       [ 40,  79,  34,  48, 110,  48,  24],
       [ 52, 121,  69,  18,  88,  73,  64],
       [147,  41, 118, 138, 128,  69,  76],
       [ 67,  30,  77,  87,  10,  18,  69]])

# 第一行切 第三行切
np.split(n14, (1, 3))

[array([[  0, 107,  40,  62, 108, 120, 130]]),
 array([[ 40,  79,  34,  48, 110,  48,  24],
        [ 52, 121,  69,  18,  88,  73,  64]]),
 array([[147,  41, 118, 138, 128,  69,  76],
        [ 67,  30,  77,  87,  10,  18,  69]])]

lena3 = np.split(lena, (200, 350))[1]
plt.imshow(lena3)
plt.show()

png

# axis=0 默认, 切分行
# axis=1, 切分列
np.split(n14, (1, 3), axis=1)

[array([[  0],
        [ 40],
        [ 52],
        [147],
        [ 67]]), array([[107,  40],
        [ 79,  34],
        [121,  69],
        [ 41, 118],
        [ 30,  77]]), array([[ 62, 108, 120, 130],
        [ 48, 110,  48,  24],
        [ 18,  88,  73,  64],
        [138, 128,  69,  76],
        [ 87,  10,  18,  69]])]

# 竖直方向切分的是行
np.vsplit(n14, (1, 3))

[array([[  0, 107,  40,  62, 108, 120, 130]]),
 array([[ 40,  79,  34,  48, 110,  48,  24],
        [ 52, 121,  69,  18,  88,  73,  64]]),
 array([[147,  41, 118, 138, 128,  69,  76],
        [ 67,  30,  77,  87,  10,  18,  69]])]

# 水平方向切分的是列
np.hsplit(n14, (2, 3))

[array([[  0, 107],
        [ 40,  79],
        [ 52, 121],
        [147,  41],
        [ 67,  30]]), array([[ 40],
        [ 34],
        [ 69],
        [118],
        [ 77]]), array([[ 62, 108, 120, 130],
        [ 48, 110,  48,  24],
        [ 18,  88,  73,  64],
        [138, 128,  69,  76],
        [ 87,  10,  18,  69]])]

副本

所有赋值运算不会为ndarray的任何元素创建副本, 对赋值后的对象的操作也对原来的对象生效.

l = [1, 2, 3, 4]
n = np.array(l)
n

array([1, 2, 3, 4])

# 当数据是ndarray时, 如果用=赋值, 内存没有改变
n2 = n
n2[2] = 90
n2

array([ 1,  2, 90,  4])

array([ 1,  2, 90,  4])

# copy()函数创建副本
n3 = n.copy()
n3[0] = 80
n3

array([80,  2, 90,  4])

array([ 1,  2, 90,  4])

ndarray的聚合操作

求和 np.sum

n15.mean()

87.0625

n15.mean(axis=0)

array([[ 82.75,  85.25,  58.5 , 116.  ],
       [ 69.  ,  96.5 ,  56.  , 115.25],
       [ 95.5 ,  92.5 , 107.75, 108.5 ],
       [ 47.25,  95.5 ,  47.  , 119.75]])

import numpy as np
import matplotlib.pyplot as plt
n16 = np.random.randint(0, 150, size=(4, 4, 4))
n16

array([[[114,  88, 143, 100],
        [ 91,   7,  84,  49],
        [114,  54,  14,  20],
        [ 83,  12, 135,   1]],

       [[ 36,  95,  80,  96],
        [ 97,  98,  39, 146],
        [  2, 127,  53, 105],
        [ 71,  15,  11,  97]],

       [[  3,  64,  27,   2],
        [109,  28,  81, 123],
        [ 64,  95, 112,  66],
        [ 42, 131,  79, 123]],

       [[ 54,  31,  10, 133],
        [138,  23, 145, 122],
        [ 66,  29,  79,  97],
        [119, 139,  12, 100]]])

np.mean(n16, axis=0)

array([[ 51.75,  69.5 ,  65.  ,  82.75],
       [108.75,  39.  ,  87.25, 110.  ],
       [ 61.5 ,  76.25,  64.5 ,  72.  ],
       [ 78.75,  74.25,  59.25,  80.25]])

np.sum(n16, axis=0)

array([[207, 278, 260, 331],
       [435, 156, 349, 440],
       [246, 305, 258, 288],
       [315, 297, 237, 321]])

最大值和最小值 np.max/np.min

display(lena.max(), lena.min())

display(lena.max(axis=0), lena.min())

array([[240, 155, 139],
       [239, 152, 138],
       [240, 154, 135],
       ...,
       [244, 211, 177],
       [246, 211, 178],
       [245, 209, 179]], dtype=uint8)



0

n15 = np.random.randint(0, 150, size=(4, 4, 4))
n15

array([[[ 18,  91, 115, 148],
        [141, 145,  58, 148],
        [ 80,  97,  70,  82],
        [ 48,  47,  85, 108]],

       [[ 91,  87,  13,  93],
        [ 18,  50,   7, 145],
        [124,  74, 124, 105],
        [  7, 124,  29, 130]],

       [[146,  77,  76,  98],
        [ 34, 126,  34,  96],
        [127,  70, 148, 131],
        [ 25, 148,   2, 141]],

       [[ 76,  86,  30, 125],
        [ 83,  65, 125,  72],
        [ 51, 129,  89, 116],
        [109,  63,  72, 100]]])

n15.max()

n15.max(axis=0)

array([[146,  91, 115, 148],
       [141, 145, 125, 148],
       [127, 129, 148, 131],
       [109, 148,  85, 141]])

n15.max(axis=2)

array([[148, 148,  97, 108],
       [ 93, 145, 124, 130],
       [146, 126, 148, 148],
       [125, 125, 129, 109]])

其他聚合操作

np.std: 标准方差
np.power: 幂运算
np.argmin: 最小值的索引
np.argmax: 最大值的索引
np.argwhere: 满足条件的元素的索引
np.sum和np.nansum的区别: nan not a number

n20 = np.random.randint(0, 100, size=10)
n20

array([35, 84, 96, 92, 32, 96, 76, 72, 82, 59])

np.argmin(n20)

np.argmax(n20)

np.argwhere(n20 > 70)

array([[1],
       [2],
       [3],
       [5],
       [6],
       [7],
       [8]], dtype=int64)

index = np.argwhere(n20 > 70)
n20[index]

array([[84],
       [96],
       [92],
       [96],
       [76],
       [72],
       [82]])

n20[np.array([[0], [1]])]

array([[35],
       [84]])

操作文件

使用pandas打开文件.csv 获取文件中的数据

import pandas as pd
df = pd.read_csv('../data/height.csv')
df

	order	name	height
0	1	Jay	175
1	2	JJ	175
2	3	four	168
3	4	Neng	170
4	5	Xie	165
5	6	Feet	170

df.values

array([[1, 'Jay', 175],
       [2, 'JJ', 175],
       [3, 'four', 168],
       [4, 'Neng', 170],
       [5, 'Xie', 165],
       [6, 'Feet', 170]], dtype=object)

ndarray的矩阵操作

基本矩阵操作

算术运算符

n21 = np.random.randint(0, 10, size=(4, 5))
n21

array([[4, 9, 1, 9, 9],
       [7, 7, 3, 7, 3],
       [6, 4, 2, 5, 4],
       [0, 0, 1, 4, 8]])

n21 + 10

array([[14, 19, 11, 19, 19],
       [17, 17, 13, 17, 13],
       [16, 14, 12, 15, 14],
       [10, 10, 11, 14, 18]])

n22 = n21 / 2
n22

array([[2. , 4.5, 0.5, 4.5, 4.5],
       [3.5, 3.5, 1.5, 3.5, 1.5],
       [3. , 2. , 1. , 2.5, 2. ],
       [0. , 0. , 0.5, 2. , 4. ]])

np.add(n21, n21)

array([[ 8, 18,  2, 18, 18],
       [14, 14,  6, 14,  6],
       [12,  8,  4, 10,  8],
       [ 0,  0,  2,  8, 16]])

矩阵积 np.dot()

n23 = np.random.randint(0, 10, size=(2, 3))
n24 = np.random.randint(0, 10, size=(3, 2))
display(n23, n24)

array([[2, 2, 4],
       [8, 7, 8]])



array([[3, 0],
       [2, 1],
       [3, 0]])

np.dot(n23, n24)

array([[22,  2],
       [62,  7]])

广播机制

ndarray广播机制的两条规则:
1. 为缺失的维度补1
2. 嘉定缺失元素用已有值填充

m = np.ones((2, 3))
a = np.arange(3)
display(m, a)

array([[1., 1., 1.],
       [1., 1., 1.]])



array([0, 1, 2])

# numpy的广播机制, 维度不对应, 自动补全
m + a

array([[1., 2., 3.],
       [1., 2., 3.]])

b = np.arange(3).reshape((3, 1))
b1 = np.arange(3)
display(b, b1)
b + b1

array([[0],
       [1],
       [2]])



array([0, 1, 2])





array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

b2 = np.ones((4, 1))
b3 = np.arange(4)
display(b2, b3)

array([[1.],
       [1.],
       [1.],
       [1.]])



array([0, 1, 2, 3])

b2 + b3

array([[1., 2., 3., 4.],
       [1., 2., 3., 4.],
       [1., 2., 3., 4.],
       [1., 2., 3., 4.]])

ndarray的排序

n31 = np.array([2, 5, 1, 7, 4])

def sortn(nd):
    '''冒泡排序'''
    for i in range(nd.size):
        for j in range(i, nd.size):
            if nd[i] > nd[j]:
                nd[i], nd[j] = nd[j], nd[i]
    return nd

sortn(n31)

array([1, 2, 4, 5, 7])

# 降低运算的空间复杂度和时间复杂度
def sortnd(nd):
    for i in range(nd.size):
#         切片, 索引不对应
        min_index = np.argmin(nd[i:]) + i
#         print(min_index)
#         print(i,nd[i],nd[min_index])
        nd[i], nd[min_index] = nd[min_index], nd[i]
    return nd

sortnd(n31)

array([1, 2, 4, 5, 7])

快速排序

np.sort()与ndarray.sort()都可以, 但是有区别:
- np.sort()不改变输入
- ndarray.sort()本地处理, 不占用空间, 但改变输入

# a.sort(axis=-1, kind='quicksort', order=None)
n32 = np.random.randint(0, 150, size=10)
n32

array([131,  31, 101,  57,  96,  50, 142, 133,  83, 141])

# 使用ndarray.sort(), 原来的数据进行了改变, 不占内存
n32.sort()
n32

array([  0,  24,  36,  43,  85, 121, 121, 135, 138, 141])

n33 = np.sort(n32)
display(n32, n33)

array([131,  31, 101,  57,  96,  50, 142, 133,  83, 141])



array([ 31,  50,  57,  83,  96, 101, 131, 133, 141, 142])

部分排序

np.partition(a, k)

当k为正时, 得到最小的k个数
当k为负时, 得到最大的k个数

nd = np.random.randint(0, 150, size=20)
nd

array([145, 134,  88, 140,  10,  59, 132, 134,  31,  74,  91,  79,  18,
        44,  21, 140,  34,  89,  63,  26])

np.partition(nd,-5)

array([ 18,  26,  63,  34,  10,  59,  21,  44,  31,  74,  79,  91,  88,
        89, 132, 134, 134, 140, 140, 145])

np.partition(nd,5)

array([ 10,  18,  21,  26,  31,  34,  44,  59,  63,  74,  91,  79, 134,
       134, 132, 140, 140,  89,  88, 145])

作者：凯旋.Lau

出处：http://www.cnblogs.com/KX-Lau/

本文版权归作者和博客园共有，欢迎转载，但未经作者同意必须在文章页面给出原文连接，否则保留追究法律责任的权利。