python 数据分析 Numpy(Numerical Python Basic)

 # 导入numpy 模块 
 1 import numpy as np 10 a = np.random.random((2,4))
 11 a
 12 Out[5]: 
 13 array([[0.20974732, 0.73822026, 0.82760722, 0.050551  ],
 14        [0.77337155, 0.06521922, 0.55524187, 0.59209907]])

 # 求矩阵所有数据的和，最小值，最大值
 22 np.sum(a)
 23 Out[7]: 3.812057513268513
 24 np.min(a)
 25 Out[8]: 0.05055099733013646
 26 np.max(a)
 27 Out[9]: 0.8276072194278252
 28 print("a=",a)
 29 a= [[0.20974732 0.73822026 0.82760722 0.050551  ]
 30  [0.77337155 0.06521922 0.55524187 0.59209907]]

# axis=0 代表列， axis=1代表行

 31 print("min",np.min(a))
 32 min 0.05055099733013646
#求每列当中的最小值
 33 print("lmin:",np.min(a,axis=0))
 34 lmin: [0.20974732 0.06521922 0.55524187 0.050551  ]
 35 print("lmin:",np.min(a,axis=1))
 36 lmin: [0.050551   0.06521922]
 37 print("sum:",np.sum(a,axis=1))
 38 sum: [1.8261258  1.98593171]

# reshape 数据， 3行4列
 39 A = np.arange(2,14).reshape(3,4)
 40 A
 41 Out[16]: 
 42 array([[ 2,  3,  4,  5],
 43        [ 6,  7,  8,  9],
 44        [10, 11, 12, 13]])

# ndarray中最小值，最大值的序号
 45 print(np.argmin(A))
 46 0
 47 print(np.argmax(A))
 48 11
 49 print(np.mean(A))
 50 7.5
 51 print(np.average(A))
 52 7.5
 53 print(A.mean())
 54 7.5

# cumsum 迭代相加
 69 A
 70 Out[24]: 
 71 array([[ 2,  3,  4,  5],
 72        [ 6,  7,  8,  9],
 73        [10, 11, 12, 13]])
 81 print(A.cumsum())
 82 [ 2  5  9 14 20 27 35 44 54 65 77 90]
 83 A
 84 Out[27]: 
 85 array([[ 2,  3,  4,  5],
 86        [ 6,  7,  8,  9],
 87        [10, 11, 12, 13]])
# clip(a, a_min, a_max) 将ndarray中的数据进行判断，小于a_min的值都赋值为a_min, 大于a_max的都赋值a_max，在这之间的值不变。
 88 print(np.clip(A,5,8))
 89 [[5 5 5 5]
 90  [6 7 8 8]
 91  [8 8 8 8]]

# 判断ndarray阶数，几维向量
 99 A.ndim
100 Out[30]: 2
101 A
102 Out[31]: 
103 array([[ 2,  3,  4,  5],
104        [ 6,  7,  8,  9],
105        [10, 11, 12, 13]])
106 A.ndim
107 Out[32]: 2
108 a
109 Out[33]: 
110 array([[0.20974732, 0.73822026, 0.82760722, 0.050551  ],
111        [0.77337155, 0.06521922, 0.55524187, 0.59209907]])
112 a.ndim
113 Out[34]: 2
114 A
115 Out[35]: 
116 array([[ 2,  3,  4,  5],
117        [ 6,  7,  8,  9],
118        [10, 11, 12, 13]])

  1 names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
  2 names
  3 Out[37]: array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

# 中间出了一些错误调用random的时候需要俩括号,(7,4)作为一个tuple变量传入random
 33 data = np.random.random((7,4))
 34 data
 35 Out[43]: 
 36 array([[0.89497078, 0.61138776, 0.69472434, 0.27105599],
 37        [0.23114404, 0.1423609 , 0.06016109, 0.56939826],
 38        [0.84711124, 0.00776355, 0.24954255, 0.96157959],
 39        [0.34937375, 0.6013533 , 0.66481223, 0.18210067],
 40        [0.82706912, 0.64240956, 0.95575726, 0.40232292],
 41        [0.57225917, 0.0958916 , 0.969577  , 0.47824937],
 42        [0.52181664, 0.59962513, 0.19175081, 0.92442871]])
# 注意random 和 randn的区别，numpy.random.randn(d0, d1, …, dn)是从标准正态分布中返回一个或多个样本值，numpy.random.rand(d0, d1, …, dn)的随机样本位于[0, 1)中。
 43 data = np.random.randn(7,4)
 44 data
 45 Out[45]: 
 46 array([[-0.41118699, -0.55989348, -1.03263407,  0.06053961],
 47        [ 0.91135901, -0.90451748, -1.12549659,  1.69668984],
 48        [ 0.54079498,  1.23213331,  0.86787185,  2.33957776],
 49        [-0.56646272,  0.87848794, -1.29842767,  0.65293394],
 50        [ 0.96861489,  1.5155331 ,  0.328894  ,  0.25768648],
 51        [-0.53991665,  0.3098865 ,  2.18921935,  0.83933456],
 52        [-1.21083646, -0.30640711,  0.36142124,  0.9664484 ]])

 58 names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
# 通过判断重新生成array
 59 names == 'Bob'
 60 Out[50]: array([ True, False, False,  True, False, False, False])
 61 data
 62 Out[51]: 
 63 array([[-0.41118699, -0.55989348, -1.03263407,  0.06053961],
 64        [ 0.91135901, -0.90451748, -1.12549659,  1.69668984],
 65        [ 0.54079498,  1.23213331,  0.86787185,  2.33957776],
 66        [-0.56646272,  0.87848794, -1.29842767,  0.65293394],
 67        [ 0.96861489,  1.5155331 ,  0.328894  ,  0.25768648],
 68        [-0.53991665,  0.3098865 ,  2.18921935,  0.83933456],
 69        [-1.21083646, -0.30640711,  0.36142124,  0.9664484 ]])
#高级用法，根据names判断生成的array再进行一次迭代选择， 和切片还有区别
 70 data[names == 'Bob']
 71 Out[52]: 
 72 array([[-0.41118699, -0.55989348, -1.03263407,  0.06053961],
 73        [-0.56646272,  0.87848794, -1.29842767,  0.65293394]])
# 切片的选择
 74 data[names == 'Bob', 2:]
 75 Out[53]: 
 76 array([[-1.03263407,  0.06053961],
 77        [-1.29842767,  0.65293394]])
 78 data[names == 'Bob', 3]
 79 Out[54]: array([0.06053961, 0.65293394])
 80 names
 81 Out[55]: array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')
 82 names == 'Joe'
 83 Out[56]: array([False,  True, False, False, False,  True,  True])
 84 names != 'Bob'
 85 Out[57]: array([False,  True,  True, False,  True,  True,  True])

#这一例程没成功，待调查。。。
 86 data[-(names == 'Bob')]
 87 Traceback (most recent call last):
 88   File "E:SoftwareSoftwareAnaconda2.5.01envsintro_dllibsite-packagesIPythoncoreinteractiveshell.py", line 2963, in run_code
 89     exec(code_obj, self.user_global_ns, self.user_ns)
 90   File "<ipython-input-58-1242c1c7d3ed>", line 1, in <module>
 91     data[-(names == 'Bob')]
 92 TypeError: The numpy boolean negative, the `-` operator, is not supported, use the `~` operator or the logical_not function instead.
 93 data
 94 Out[59]: 
 95 array([[-0.41118699, -0.55989348, -1.03263407,  0.06053961],
 96        [ 0.91135901, -0.90451748, -1.12549659,  1.69668984],
 97        [ 0.54079498,  1.23213331,  0.86787185,  2.33957776],
 98        [-0.56646272,  0.87848794, -1.29842767,  0.65293394],
 99        [ 0.96861489,  1.5155331 ,  0.328894  ,  0.25768648],
100        [-0.53991665,  0.3098865 ,  2.18921935,  0.83933456],
101        [-1.21083646, -0.30640711,  0.36142124,  0.9664484 ]])
102 data[-(names == 'Bob')]
103 Traceback (most recent call last):
104   File "E:SoftwareSoftwareAnaconda2.5.01envsintro_dllibsite-packagesIPythoncoreinteractiveshell.py", line 2963, in run_code
105     exec(code_obj, self.user_global_ns, self.user_ns)
106   File "<ipython-input-60-1242c1c7d3ed>", line 1, in <module>
107     data[-(names == 'Bob')]
108 TypeError: The numpy boolean negative, the `-` operator, is not supported, use the `~` operator or the logical_not function instead.
109 data
110 Out[61]: 
111 array([[-0.41118699, -0.55989348, -1.03263407,  0.06053961],
112        [ 0.91135901, -0.90451748, -1.12549659,  1.69668984],
113        [ 0.54079498,  1.23213331,  0.86787185,  2.33957776],
114        [-0.56646272,  0.87848794, -1.29842767,  0.65293394],
115        [ 0.96861489,  1.5155331 ,  0.328894  ,  0.25768648],
116        [-0.53991665,  0.3098865 ,  2.18921935,  0.83933456],
117        [-1.21083646, -0.30640711,  0.36142124,  0.9664484 ]])
118 -data
119 Out[62]: 
120 array([[ 0.41118699,  0.55989348,  1.03263407, -0.06053961],
121        [-0.91135901,  0.90451748,  1.12549659, -1.69668984],
122        [-0.54079498, -1.23213331, -0.86787185, -2.33957776],
123        [ 0.56646272, -0.87848794,  1.29842767, -0.65293394],
124        [-0.96861489, -1.5155331 , -0.328894  , -0.25768648],
125        [ 0.53991665, -0.3098865 , -2.18921935, -0.83933456],
126        [ 1.21083646,  0.30640711, -0.36142124, -0.9664484 ]])
127 -data[names == 'Bob']
128 Out[63]: 
129 array([[ 0.41118699,  0.55989348,  1.03263407, -0.06053961],
130        [ 0.56646272, -0.87848794,  1.29842767, -0.65293394]])
131 data[names != 'Bob']
132 Out[64]: 
133 array([[ 0.91135901, -0.90451748, -1.12549659,  1.69668984],
134        [ 0.54079498,  1.23213331,  0.86787185,  2.33957776],
135        [ 0.96861489,  1.5155331 ,  0.328894  ,  0.25768648],
136        [-0.53991665,  0.3098865 ,  2.18921935,  0.83933456],
137        [-1.21083646, -0.30640711,  0.36142124,  0.9664484 ]])
138 data[-names != 'Bob']
139 Traceback (most recent call last):
140   File "E:SoftwareSoftwareAnaconda2.5.01envsintro_dllibsite-packagesIPythoncoreinteractiveshell.py", line 2963, in run_code
141     exec(code_obj, self.user_global_ns, self.user_ns)
142   File "<ipython-input-65-5976a92eae9b>", line 1, in <module>
143     data[-names != 'Bob']
144 TypeError: ufunc 'negative' did not contain a loop with signature matching types dtype('<U4') dtype('<U4')

#组合选择
145 msk = (names == 'Bob') | (names == 'Will')
146 msk
147 Out[67]: array([ True, False,  True,  True,  True, False, False])
148 data[msk]
149 Out[68]: 
150 array([[-0.41118699, -0.55989348, -1.03263407,  0.06053961],
151        [ 0.54079498,  1.23213331,  0.86787185,  2.33957776],
152        [-0.56646272,  0.87848794, -1.29842767,  0.65293394],
153        [ 0.96861489,  1.5155331 ,  0.328894  ,  0.25768648]])

#根据判断冲洗赋值
154 data[data < 0] = 0
155 data
156 Out[70]: 
157 array([[0.        , 0.        , 0.        , 0.06053961],
158        [0.91135901, 0.        , 0.        , 1.69668984],
159        [0.54079498, 1.23213331, 0.86787185, 2.33957776],
160        [0.        , 0.87848794, 0.        , 0.65293394],
161        [0.96861489, 1.5155331 , 0.328894  , 0.25768648],
162        [0.        , 0.3098865 , 2.18921935, 0.83933456],
163        [0.        , 0.        , 0.36142124, 0.9664484 ]])
164 arr = np.empty((8,4))
165 arr
166 Out[72]: 
167 array([[6.23042070e-307, 4.22795269e-307, 2.04722549e-306,
168         6.23054972e-307],
169        [1.78019761e-306, 9.34608432e-307, 7.56599807e-307,
170         8.90104239e-307],
171        [1.16820282e-307, 6.23037317e-307, 1.69121639e-306,
172         1.78020848e-306],
173        [8.90094053e-307, 1.11261027e-306, 1.11261502e-306,
174         1.42410839e-306],
175        [7.56597770e-307, 6.23059726e-307, 1.42419530e-306,
176         1.37961302e-306],
177        [1.29060531e-306, 1.11261570e-306, 7.56602523e-307,
178         9.34609790e-307],
179        [8.34451504e-308, 1.22383391e-307, 1.33511562e-306,
180         8.90103560e-307],
181        [1.42410974e-306, 1.00132228e-307, 1.33511969e-306,
182         2.18568966e-312]])
183 for i in range(8):
184     a[i] = i
185     
186 Traceback (most recent call last):
187   File "E:SoftwareSoftwareAnaconda2.5.01envsintro_dllibsite-packagesIPythoncoreinteractiveshell.py", line 2963, in run_code
188     exec(code_obj, self.user_global_ns, self.user_ns)
189   File "<ipython-input-73-077106ef35e3>", line 2, in <module>
190     a[i] = i
191 IndexError: index 2 is out of bounds for axis 0 with size 2

# arr[i]是行的地址，给行地址指针赋值相当于改写了整块内存的值， 整行赋值， 不知道理解的对不对
192 for i in range(8):
193     arr[i] = i
194     
195 arr
196 Out[75]: 
197 array([[0., 0., 0., 0.],
198        [1., 1., 1., 1.],
199        [2., 2., 2., 2.],
200        [3., 3., 3., 3.],
201        [4., 4., 4., 4.],
202        [5., 5., 5., 5.],
203        [6., 6., 6., 6.],
204        [7., 7., 7., 7.]])
205 arr[0] = 9
206 arr
207 Out[77]: 
208 array([[9., 9., 9., 9.],
209        [1., 1., 1., 1.],
210        [2., 2., 2., 2.],
211        [3., 3., 3., 3.],
212        [4., 4., 4., 4.],
213        [5., 5., 5., 5.],
214        [6., 6., 6., 6.],
215        [7., 7., 7., 7.]])
216 arr[[4,3,0,6]]
217 Out[78]: 
218 array([[4., 4., 4., 4.],
219        [3., 3., 3., 3.],
220        [9., 9., 9., 9.],
221        [6., 6., 6., 6.]])
222 arr = np.arange(15).reshape((3,5))
223 arr
224 Out[80]: 
225 array([[ 0,  1,  2,  3,  4],
226        [ 5,  6,  7,  8,  9],
227        [10, 11, 12, 13, 14]])

# .T转置矩阵
228 arr.T
229 Out[81]: 
230 array([[ 0,  5, 10],
231        [ 1,  6, 11],
232        [ 2,  7, 12],
233        [ 3,  8, 13],
234        [ 4,  9, 14]])

#np.dot 矩阵相乘
235 np.dot(arr.T, arr)
236 Out[82]: 
237 array([[125, 140, 155, 170, 185],
238        [140, 158, 176, 194, 212],
239        [155, 176, 197, 218, 239],
240        [170, 194, 218, 242, 266],
241        [185, 212, 239, 266, 293]])
242 arr
243 Out[83]: 
244 array([[ 0,  1,  2,  3,  4],
245        [ 5,  6,  7,  8,  9],
246        [10, 11, 12, 13, 14]])
247 arr.T
248 Out[84]: 
249 array([[ 0,  5, 10],
250        [ 1,  6, 11],
251        [ 2,  7, 12],
252        [ 3,  8, 13],
253        [ 4,  9, 14]])
254 np.dot(arr.T, arr)
255 Out[85]: 
256 array([[125, 140, 155, 170, 185],
257        [140, 158, 176, 194, 212],
258        [155, 176, 197, 218, 239],
259        [170, 194, 218, 242, 266],
260        [185, 212, 239, 266, 293]])
261 np.dot(arr, arr.T)
262 Out[86]: 
263 array([[ 30,  80, 130],
264        [ 80, 255, 430],
265        [130, 430, 730]])
266 arr = np.arange(10)

#矩阵开方
267 np.sqrt(arr)
268 Out[88]: 
269 array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
270        2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])
271 np.exp(arr)
272 Out[89]: 
273 array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
274        5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
275        2.98095799e+03, 8.10308393e+03])
276 x = np.random.randn(8)
277 y = np.random.randn(8)
278 x
279 Out[92]: 
280 array([-0.80864713, -1.10307828,  0.39407346, -1.51956716, -0.69376606,
281        -0.5599136 ,  0.37168709, -0.3947183 ])
282 y
283 Out[93]: 
284 array([ 1.49291073, -0.30018043, -0.1632179 , -0.53365993,  2.48673945,
285        -0.72669644, -0.18439522,  2.03956463])

#俩矩阵相比较
293 np.maximum(x, y)
294 Out[95]: 
295 array([ 1.49291073, -0.30018043,  0.39407346, -0.53365993,  2.48673945,
296        -0.5599136 ,  0.37168709,  2.03956463])
297 arr = np.random.randn(7)

#矩阵相乘
298 arr*5
299 Out[97]: 
300 array([-9.09778567, -1.2577255 ,  2.85527111, -1.10915396, -3.61125732,
301         4.83669313,  0.49764244])
302 arr
303 Out[98]: 
304 array([-1.81955713, -0.2515451 ,  0.57105422, -0.22183079, -0.72225146,
305         0.96733863,  0.09952849])
306 arr = np.random.randn(7) * 5
307 arr
308 Out[100]: 
309 array([ 2.02351861,  6.79384776, -5.29035855,  4.15965833,  7.93557854,
310        -1.93563595,  1.45949827])
#np.modf是个神奇的函数， 分别显示生辰改动连个不同矩阵
311 np.modf(arr)
312 Out[101]: 
313 (array([ 0.02351861,  0.79384776, -0.29035855,  0.15965833,  0.93557854,
314         -0.93563595,  0.45949827]), array([ 2.,  6., -5.,  4.,  7., -1.,  1.]))



322 points = np.arange(-5, 5, 0.01)
323 xs, ys = np.meshgrid(points, points)
324 xs
325 Out[105]: 
326 array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
327        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
328        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
329        ...,
330        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
331        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
332        [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])
333 ys
334 Out[106]: 
335 array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
336        [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
337        [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
338        ...,
339        [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
340        [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
341        [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])
342 import matplotlib.pyplot as plt
343 Backend TkAgg is interactive backend. Turning interactive mode on.
344 z = np.sqrt(xs**2 + ys**2)
345 z
346 Out[109]: 
347 array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
348         7.06400028],
349        [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
350         7.05692568],
351        [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
352         7.04985815],
353        ...,
354        [7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
355         7.04279774],
356        [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
357         7.04985815],
358        [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
359         7.05692568]])

#plt 的两个图像是进行叠加显示的。
360 plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
361 Out[110]: <matplotlib.colorbar.Colorbar at 0x8a3ccc0>
362 plt.title("Image plot of $sqrt{x^2 + y^2}$ for a grid of values")
363 Out[111]: Text(0.5,1,'Image plot of $\sqrt{x^2 + y^2}$ for a grid of values')
364 plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
365 Out[112]: <matplotlib.colorbar.Colorbar at 0xa894668>