机器学习之路--Numpy

常用代码

ndarray.dtype 数据类型必须是一样的

常用代码

import numpy 

#numpy读取文件 world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype=str, skip_header=1) #<class 'numpy.ndarray'> print(type(world_alcohol)) #获取帮助信息 print (help(numpy.genfromtxt)) #创建一个一维数组 (4,) vector = numpy.array([1, 2, 3, 4]) #创建一个矩阵 (3,3) matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]]) #获取矩阵的行和列数 print(matrix.shape) >>(3,3) #获取第二行第二列的值 third_country = world_alcohol[2,2] #创建一个矩阵 matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) #获取第一列的所有值 print(matrix[:,1]) >>[10 25 40] #获取第一行的所有值 print(matrix[1,:]) >>[20, 25, 30] #获取第0列到第二列的所有值 print(matrix[:,0:2]) >>[[ 5 10] [20 25] [35 40]] #判断是否有该数 matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) #注意返回的是一个布尔值列表 matrix == 25 >>array([[False, False, False], [False, True, False], [False, False, False]], dtype=bool) #根据布尔相应条件返回值 matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) second_column_25 = (matrix[:,1] == 25) print second_column_25 print(matrix[second_column_25, :]) >>[False True False] [[20 25 30]] #集合操作 vector = numpy.array([5, 10, 15, 20]) equal_to_ten_and_five = (vector == 10) & (vector == 5) print equal_to_ten_and_five >>[False False False False] vector = numpy.array([5, 10, 15, 20]) equal_to_ten_or_five = (vector == 10) | (vector == 5) print equal_to_ten_or_five >>[ True True False False] vector = numpy.array([5, 10, 15, 20]) equal_to_ten_or_five = (vector == 10) | (vector == 5) vector[equal_to_ten_or_five] = 50 print(vector) >>[50 50 15 20] #dtype的 转换 vector = numpy.array(["1", "2", "3"]) print (vector.dtype) print vector vector = vector.astype(float) print vector.dtype print vector >>|S1 ['1' '2' '3'] float64 [ 1. 2. 3.] #最小值 vector = numpy.array([5, 10, 15, 20]) vector.min() #求和 axis=1是按行 axis=0是按列 matrix = numpy.array([ [5, 10, 15], [20, 25, 30], [35, 40, 45] ]) matrix.sum(axis=1) >>array([ 30, 75, 120]) #小案例替换文本中的nan为0 #原始数据 a,b,ce,1 ea,b4,fc,1 a,b,c, a3,b3,fc,1 ae,b2,c, af,b,c,1 #replace nan value with 0 #注意如果dtype不为float的像字符串这样就会被转为nan world_alcohol = numpy.genfromtxt("test.txt", delimiter=",",dtype=float) print (world_alcohol) #这里is_value_empty 返回的是一个布尔列表 is_value_empty = numpy.isnan(world_alcohol[:,3]) print (is_value_empty) #world_alcohol 里面可以加布尔列表 world_alcohol[is_value_empty, 3] = '0' alcohol_consumption = world_alcohol[:,3] alcohol_consumption = alcohol_consumption.astype(float) total_alcohol = alcohol_consumption.sum() average_alcohol = alcohol_consumption.mean() print (total_alcohol) print (average_alcohol) >> [[nan nan nan 1.] [nan nan nan 1.] [nan nan nan nan] [nan nan nan 1.] [nan nan nan nan] [nan nan nan 1.]] [False False True False True False] 4.0 0.6666666666666666 #生成数组 print (np.arange(15)) a = np.arange(15).reshape(3, 5) a >>[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14] array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) #获取维度 a.ndim >>2 #获取值类型 a.dtype.name >>'int32' #生成一个全是0的矩阵 np.zeros ((3,4)) >> array([[ 0., 0., 0., 0.], [ 0., 0., 0., 0.], [ 0., 0., 0., 0.]]) #生成两个3行4维全是1的矩阵 np.ones( (2,3,4), dtype=np.int32 ) >> array([[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]) #生成一个10到30区间范围的 并且 以5为步长的一维矩阵 np.arange( 10, 30, 5 ) >>array([10, 15, 20, 25] #生成一个0到12区间范围的 并且 以1为步长的一维矩阵 然后在 reshape成为4行3列的矩阵 np.arange(12).reshape(4,3) >> array([[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8], [ 9, 10, 11]]) #随机生成一个2行3列的矩阵 里面的值是在-1到1里面取 np.random.random((2,3)) >> array([[-0.54802527, -0.13235897, -0.25751953], [ 0.29272435, 0.05077192, -0.31131139]]) #均值生成的数组 前两个参数是取值范围 第三个参数是个数 np.linspace( 0, 10, 5 ) >>array([ 0. , 2.5, 5. , 7.5, 10. ]) #矩阵计算1 #The matrix product can be performed using the dot function or method A = np.array( [[1,1], [0,1]] ) B = np.array( [[2,0], [3,4]] ) print (A) print ('-------') print (B) print ('-------') #A B矩阵之间的内积 print (A*B) print ('-------') #A B矩阵之间的乘法 print (A.dot(B)) print ('-------') print (np.dot(A, B)) >> [[1 1] [0 1]] ------- [[2 0] [3 4]] ------- [[2 0] [0 4]] ------- [[5 4] [3 4]] ------- [[5 4] [3 4]] #矩阵计算2 #the product operator * operates elementwise in NumPy arrays a = np.array( [20,30,40,50] ) b = np.arange( 4 ) print (a) print (b) #b c = a-b print (c) c = c -1 print (c) b**2 print (b**2) print (a<35) >>[20 30 40 50] [0 1 2 3] [20 29 38 47] [19 28 37 46] [0 1 4 9] [ True True False False] #矩阵计算3 B = np.arange(3) print (B) #exp是ln对数 print (np.exp(B)) print (np.sqrt(B)) >>[0 1 2] [ 1. 2.71828183 7.3890561 ] [ 0. 1. 1.41421356] #floor是向下取整 比如 np.floor(-1.5) >> -2.0 a = np.floor(10*np.random.random((3,4))) print (a) print ('--------') #a.shape #ravel()是讲矩阵变成一维数组 print (a.ravel()) print ('--------') a.shape = (6, 2) print (a) print ('--------') #a.T是转置 print (a.T) >> [[ 6. 7. 2. 9.] [ 6. 0. 5. 2.] [ 9. 0. 9. 6.]] -------- [ 6. 7. 2. 9. 6. 0. 5. 2. 9. 0. 9. 6.] -------- [[ 6. 7.] [ 2. 9.] [ 6. 0.] [ 5. 2.] [ 9. 0.] [ 9. 6.]] -------- [[ 6. 2. 6. 5. 9. 9.] [ 7. 9. 0. 2. 0. 6.]] #合并 import numpy as np a = np.floor(10*np.random.random((2,2))) b = np.floor(10*np.random.random((2,2))) print (a) print ('---') print (b) print ('---') #垂直合并 print (np.vstack((a,b))) #水平合并 #np.hstack((a,b)) >> [[ 3. 7.] [ 2. 6.]] --- [[ 9. 6.] [ 0. 7.]] --- [[ 3. 7.] [ 2. 6.] [ 9. 6.] [ 0. 7.]] #切分 a = np.floor(10*np.random.random((2,12))) print (a) print ('---') #hsplit按列切分第一个参数是要切分的数据集 # 第二个参数是要平均的切分几份 print (np.hsplit(a,3)) print ('---') #里面(3,4)是指在第3列和第四列切开 print (np.hsplit(a,(3,4))) # Split a after the third and the fourth column a = np.floor(10*np.random.random((12,2))) print ('---') print (a) np.vsplit(a,3) >> [[ 8. 3. 3. 5. 9. 0. 1. 1. 6. 2. 7. 2.] [ 7. 1. 9. 7. 5. 2. 5. 7. 0. 3. 1. 1.]] --- [array([[ 8., 3., 3., 5.], [ 7., 1., 9., 7.]]), array([[ 9., 0., 1., 1.], [ 5., 2., 5., 7.]]), array([[ 6., 2., 7., 2.], [ 0., 3., 1., 1.]])] --- [array([[ 8., 3., 3.], [ 7., 1., 9.]]), array([[ 5.], [ 7.]]), array([[ 9., 0., 1., 1., 6., 2., 7., 2.], [ 5., 2., 5., 7., 0., 3., 1., 1.]])] --- [[ 8. 1.] [ 3. 2.] [ 8. 0.] [ 9. 0.] [ 9. 0.] [ 5. 3.] [ 3. 3.] [ 7. 2.] [ 5. 7.] [ 9. 6.] [ 4. 0.] [ 8. 4.]] [array([[ 8., 1.], [ 3., 2.], [ 8., 0.], [ 9., 0.]]), array([[ 9., 0.], [ 5., 3.], [ 3., 3.], [ 7., 2.]]), array([[ 5., 7.], [ 9., 6.], [ 4., 0.], [ 8., 4.]])] #Python深浅拷贝 https://www.cnblogs.com/echoboy/p/9059183.html = 数据完全共享 b=[1,2,['a','b']] a=b 浅拷贝 数据半共享(复制其数据独立内存存放,但是只拷贝成功第一层) a=b.copy() 深拷贝 数据完全不共享(复制其数据完完全全放独立的一个内存,完全拷贝,数据不共享) import copy a=copy.deepcopy(b)

 


















原文地址:https://www.cnblogs.com/ggnbnb/p/9817230.html