科学计算库numpy

老唐数据分析机器学习
numpy1

import numpy

world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype=str)
print(type(world_alcohol))
print (world_alcohol) 
#print (help(numpy.genfromtxt)) #帮助文档
'''
<class 'numpy.ndarray'>
[['Year' 'WHO region' 'Country' 'Beverage Types' 'Display Value']
 ['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
'''

#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
vector = numpy.array([5, 10, 15, 20])
#When we input a list of lists, we get a matrix as a result:
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print (vector)
print (matrix)
'''
[ 5 10 15 20]
[[ 5 10 15]
 [20 25 30]
 [35 40 45]]
'''

#We can use the ndarray.shape property to figure out how many elements are in the array
vector = numpy.array([1, 2, 3, 4])
print(vector.shape)
#For matrices, the shape property contains a tuple with 2 elements.
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)
'''
(4,)
(2, 3)
'''

#Each value in a NumPy array has to have the same data type
#NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays. 
#You can check the data type of a NumPy array using the dtype property.
numbers = numpy.array([1, 2, 3, 4])
numbers.dtype
'''
dtype('int32')
'''

#When NumPy can't convert a value to a numeric data type like float or integer, it uses a special nan value that stands for Not a Number
#nan is the missing data
#1.98600000e+03 is actually 1.986 * 10 ^ 3
world_alcohol
'''
array([['Year', 'WHO region', 'Country', 'Beverage Types',
        'Display Value'],
       ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ...,
       ['1987', 'Africa', 'Malawi', 'Other', '0.75'],
       ['1989', 'Americas', 'Bahamas', 'Wine', '1.5'],
       ['1985', 'Africa', 'Malawi', 'Spirits', '0.31']], dtype='<U52')
'''

world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
print(world_alcohol)
'''
[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
'''

uruguay_other_1986 = world_alcohol[1,4]
third_country = world_alcohol[2,2]
print (uruguay_other_1986)
print (third_country)
'''
0.5
Cte d'Ivoire
'''

vector = numpy.array([5, 10, 15, 20])
print(vector[0:3])  
'''
[ 5 10 15]
'''

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,1])
'''
[10 25 40]
'''

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,0:2])
'''
[[ 5 10]
 [20 25]
 [35 40]]
'''

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[1:3,0:2])
'''
[[20 25]
 [35 40]]
'''

numpy2

import numpy
#it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
vector = numpy.array([5, 10, 15, 20])
vector == 10
'''
array([False,  True, False, False])
'''

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
matrix == 25
'''
array([[False, False, False],
       [False,  True, False],
       [False, False, False]])
'''

#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print (equal_to_ten)
print(vector[equal_to_ten])
'''
[False  True False False]
[10]
'''

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
second_column_25 = (matrix[:,1] == 25)
print (second_column_25)
print(matrix[second_column_25, :])
'''
[False  True False]
[[20 25 30]]
'''

#We can also perform comparisons with multiple conditions
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)
print (equal_to_ten_and_five)
'''
[False False False False]
'''

vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
print (equal_to_ten_or_five)
'''
[ True  True False False]
'''

vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
vector[equal_to_ten_or_five] = 50
print(vector)
'''
[50 50 15 20]
'''

matrix = numpy.array([
            [5, 10, 15], 
            [20, 25, 30],
            [35, 40, 45]
         ])
second_column_25 = matrix[:,1] == 25
print (second_column_25)
matrix[second_column_25, 1] = 10
print (matrix)
'''
[False  True False]
[[ 5 10 15]
 [20 10 30]
 [35 40 45]]
'''

#We can convert the data type of an array with the ndarray.astype() method.
vector = numpy.array(["1", "2", "3"])
print (vector.dtype)
print (vector)
vector = vector.astype(float)
print (vector.dtype)
print (vector)
'''
<U1
['1' '2' '3']
float64
[1. 2. 3.]
'''

vector = numpy.array([5, 10, 15, 20])
vector.sum()
'''
50
'''

# The axis dictates which dimension we perform the operation on
#1 means that we want to perform the operation on each row, and 0 means on each column
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=1)
'''
array([ 30,  75, 120])
'''

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=0)
'''
array([60, 75, 90])
'''

#replace nan value with 0
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
#print world_alcohol
is_value_empty = numpy.isnan(world_alcohol[:,4])
#print is_value_empty
world_alcohol[is_value_empty, 4] = '0'
alcohol_consumption = world_alcohol[:,4]
alcohol_consumption = alcohol_consumption.astype(float)
total_alcohol = alcohol_consumption.sum()
average_alcohol = alcohol_consumption.mean()
print (total_alcohol)
print (average_alcohol)
'''
1137.78
1.140060120240481
'''

numpy3

import numpy as np
print(np.arange(15))
a = np.arange(15).reshape(3, 5)
a
'''
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
'''

a.shape
'''
(3, 5)
'''

#the number of axes (dimensions) of the array
a.ndim
'''
2
'''

a.dtype.name
'''
'int32'
'''

#the total number of elements of the array
a.size
'''
15
'''

np.zeros ((3,4)) 
'''
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])
'''

np.ones( (2,3,4), dtype=np.int32 )
'''
array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]])
'''

#To create sequences of numbers
np.arange( 10, 30, 5 )
'''
array([10, 15, 20, 25])
'''

np.arange( 0, 2, 0.3 )
'''
array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])
'''

np.arange(12).reshape(4,3)
'''
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])
'''

np.random.random((2,3))#random随机产生(-1,1)区间的数
'''
array([[0.06665873, 0.92526157, 0.42866618],
       [0.19151176, 0.79870056, 0.32145198]])
'''

from numpy import pi 
#linspace取100个间隔为2*pi的数
np.linspace( 0, 2*pi, 100 )
'''
array([0.        , 0.06346652, 0.12693304, 0.19039955, 0.25386607,
       0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866,
       0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126,
       0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385,
       1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644,
       1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903,
       1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162,
       2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421,
       2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 ,
       2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939,
       3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199,
       3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458,
       3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717,
       4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976,
       4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652235,
       4.75998887, 4.82345539, 4.88692191, 4.95038842, 5.01385494,
       5.07732146, 5.14078798, 5.2042545 , 5.26772102, 5.33118753,
       5.39465405, 5.45812057, 5.52158709, 5.58505361, 5.64852012,
       5.71198664, 5.77545316, 5.83891968, 5.9023862 , 5.96585272,
       6.02931923, 6.09278575, 6.15625227, 6.21971879, 6.28318531])
'''

np.sin(np.linspace( 0, 2*pi, 100 ))
'''
array([ 0.00000000e+00,  6.34239197e-02,  1.26592454e-01,  1.89251244e-01,
        2.51147987e-01,  3.12033446e-01,  3.71662456e-01,  4.29794912e-01,
        4.86196736e-01,  5.40640817e-01,  5.92907929e-01,  6.42787610e-01,
        6.90079011e-01,  7.34591709e-01,  7.76146464e-01,  8.14575952e-01,
        8.49725430e-01,  8.81453363e-01,  9.09631995e-01,  9.34147860e-01,
        9.54902241e-01,  9.71811568e-01,  9.84807753e-01,  9.93838464e-01,
        9.98867339e-01,  9.99874128e-01,  9.96854776e-01,  9.89821442e-01,
        9.78802446e-01,  9.63842159e-01,  9.45000819e-01,  9.22354294e-01,
        8.95993774e-01,  8.66025404e-01,  8.32569855e-01,  7.95761841e-01,
        7.55749574e-01,  7.12694171e-01,  6.66769001e-01,  6.18158986e-01,
        5.67059864e-01,  5.13677392e-01,  4.58226522e-01,  4.00930535e-01,
        3.42020143e-01,  2.81732557e-01,  2.20310533e-01,  1.58001396e-01,
        9.50560433e-02,  3.17279335e-02, -3.17279335e-02, -9.50560433e-02,
       -1.58001396e-01, -2.20310533e-01, -2.81732557e-01, -3.42020143e-01,
       -4.00930535e-01, -4.58226522e-01, -5.13677392e-01, -5.67059864e-01,
       -6.18158986e-01, -6.66769001e-01, -7.12694171e-01, -7.55749574e-01,
       -7.95761841e-01, -8.32569855e-01, -8.66025404e-01, -8.95993774e-01,
       -9.22354294e-01, -9.45000819e-01, -9.63842159e-01, -9.78802446e-01,
       -9.89821442e-01, -9.96854776e-01, -9.99874128e-01, -9.98867339e-01,
       -9.93838464e-01, -9.84807753e-01, -9.71811568e-01, -9.54902241e-01,
       -9.34147860e-01, -9.09631995e-01, -8.81453363e-01, -8.49725430e-01,
       -8.14575952e-01, -7.76146464e-01, -7.34591709e-01, -6.90079011e-01,
       -6.42787610e-01, -5.92907929e-01, -5.40640817e-01, -4.86196736e-01,
       -4.29794912e-01, -3.71662456e-01, -3.12033446e-01, -2.51147987e-01,
       -1.89251244e-01, -1.26592454e-01, -6.34239197e-02, -2.44929360e-16])
'''

#the product operator * operates elementwise in NumPy arrays
a = np.array( [20,30,40,50] )
b = np.arange( 4 )
#print (a) 
#print (b)
#b
c = a-b
#print (c)
b**2
#print (b**2)
print (a<35)
'''
[ True  True False False]
'''

#The matrix product can be performed using the dot function or method
A = np.array( [[1,1],
               [0,1]] )
B = np.array( [[2,0],
               [3,4]] )
print (A)
print (B)
#print (A*B) #对应位置上的数相乘
print (A.dot(B)) #数学中的矩阵相乘
print (np.dot(A, B)) #数学中的矩阵相乘
'''
[[1 1]
 [0 1]]
[[2 0]
 [3 4]]
[[5 4]
 [3 4]]
[[5 4]
 [3 4]]
'''

numpy4

import numpy as np
B = np.arange(3)
print(B)
print(np.exp(B))
print(np.sqrt(B))
'''
[0 1 2]
[1.         2.71828183 7.3890561 ]
[0.         1.         1.41421356]
'''

#Return the floor of the input
a = np.floor(10*np.random.random((3,4)))
print(a)
print('--------------')
print(a.shape)
print('--------------')
## flatten the array
print(a.ravel()) #拉平
print('--------------')
a.shape = (6, 2)
print(a)
print('--------------')
print(a.T) #转置
print(a.resize((2,6)))
print(a)

#If a dimension is given as -1 in a reshaping operation, the other dimensions are automatically calculated:
#a.reshape(3,-1) #用-1表示会进行自动计算
'''
[[1. 6. 7. 4.]
 [5. 4. 1. 0.]
 [2. 3. 9. 7.]]
--------------
(3, 4)
[1. 6. 7. 4. 5. 4. 1. 0. 2. 3. 9. 7.]
--------------
[[1. 6.]
 [7. 4.]
 [5. 4.]
 [1. 0.]
 [2. 3.]
 [9. 7.]]
--------------
[[1. 7. 5. 1. 2. 9.]
 [6. 4. 4. 0. 3. 7.]]
None
[[1. 6. 7. 4. 5. 4.]
 [1. 0. 2. 3. 9. 7.]]
'''

a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print(a)
print('---')
print(b)
print('---')
print(np.hstack((a,b))) #横向拼接
print(np.vstack((a,b))) #纵向拼接
#np.hstack((a,b))
'''
[[7. 5.]
 [9. 1.]]
---
[[6. 2.]
 [4. 7.]]
---
[[7. 5. 6. 2.]
 [9. 1. 4. 7.]]
[[7. 5.]
 [9. 1.]
 [6. 2.]
 [4. 7.]]
'''

a = np.floor(10*np.random.random((2,12)))
print(a)
print('-------------')
print(np.hsplit(a,3)) #横向平均切分三份
print('-------------')
print(np.hsplit(a,(3,4)))   # Split a after the third and the fourth column
a = np.floor(10*np.random.random((12,2)))
print('-------------')
print(a)
np.vsplit(a,3) #纵向平均切分三份
'''

[[0. 8. 1. 3. 4. 7. 7. 1. 9. 8. 7. 2.]
 [4. 2. 7. 3. 9. 6. 9. 1. 7. 8. 3. 8.]]
-------------
[array([[0., 8., 1., 3.],
       [4., 2., 7., 3.]]), array([[4., 7., 7., 1.],
       [9., 6., 9., 1.]]), array([[9., 8., 7., 2.],
       [7., 8., 3., 8.]])]
-------------
[array([[0., 8., 1.],
       [4., 2., 7.]]), array([[3.],
       [3.]]), array([[4., 7., 7., 1., 9., 8., 7., 2.],
       [9., 6., 9., 1., 7., 8., 3., 8.]])]
-------------
[[9. 3.]
 [3. 5.]
 [1. 1.]
 [0. 3.]
 [6. 4.]
 [5. 6.]
 [9. 4.]
 [1. 7.]
 [6. 2.]
 [1. 6.]
 [1. 1.]
 [8. 9.]]
[array([[9., 3.],
        [3., 5.],
        [1., 1.],
        [0., 3.]]), array([[6., 4.],
        [5., 6.],
        [9., 4.],
        [1., 7.]]), array([[6., 2.],
        [1., 6.],
        [1., 1.],
        [8., 9.]])]
'''


python三种复制

#Simple assignments make no copy of array objects or of their data.
a = np.arange(12)
b = a
# a and b are two names for the same ndarray object
print(b is a)
b.shape = (3,4)
print(a.shape)
print(id(a))
print(id(b))
'''
True
(3, 4)
1229965715056
1229965715056
'''

#The view method creates a new array object that looks at the same data.
c = a.view()
print(c is a)
c.shape = 2,6
print(a.shape)
c[0,4] = 1234
print(a)
print(id(a))
print(id(c))
'''
False
(3, 4)
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
1229965715056
1229965716336
'''

#The copy method makes a complete copy of the array and its data.
d = a.copy() 
print(d is a)
d[0,0] = 9999
print(d)
print(a)
'''
False
[[9999    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
'''

numpy5

import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print(data)
ind = data.argmax(axis=0)
print(ind)
print(data.shape)
print(data.shape[1])
data_max = data[ind, range(data.shape[1])]
print(data_max)
all(data_max == data.max(axis=0))
'''
[[ 0.          0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
 [ 0.98935825  0.41211849 -0.54402111 -0.99999021]
 [-0.53657292  0.42016704  0.99060736  0.65028784]
 [-0.28790332 -0.96139749 -0.75098725  0.14987721]]
[2 0 3 1]
(5, 4)
4
[0.98935825 0.84147098 0.99060736 0.6569866 ]
True
'''

a = np.arange(0, 40, 10)
print(a)
b = np.tile(a, (3, 5)) #扩展
print(b)
'''
[ 0 10 20 30]
[[ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]]
'''

a = np.array([[4, 3, 5], [1, 2, 1]])
print(a)
print('------------')
b = np.sort(a, axis=1)
print(b)
#b
a.sort(axis=1)
print('------------')
print(a)
a = np.array([4, 3, 1, 2])
j = np.argsort(a) #排序得到索引值
print('------------')
print(j)
print('------------')
print(a[j])
'''
[[4 3 5]
 [1 2 1]]
------------
[[3 4 5]
 [1 1 2]]
------------
[[3 4 5]
 [1 1 2]]
------------
[2 3 1 0]
------------
[1 2 3 4]
'''