机器学习十讲第六讲

降维
基于 PCA 的特征脸提取和人脸重构
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
faces = fetch_olivetti_faces()
def principal_component_analysis(X, l):
    X = X - np.mean(X, axis=0)#对原始数据进行中心化处理
    sigma = X.T.dot(X)/(len(X)-1) # 计算协方差矩阵
    a,w = np.linalg.eig(sigma) # 计算协方差矩阵的特征值和特征向量
    sorted_indx = np.argsort(-a) # 将特征向量按照特征值进行排序
    X_new = X.dot(w[:,sorted_indx[0:l]])#对数据进行降维
    return X_new,w[:,sorted_indx[0:l]],a[sorted_indx[0:l]] #返回降维后的数据、主成分、对应特征值
rndperm = np.random.permutation(len(faces.data))
plt.gray()
faces_reduced,W,lambdas = principal_component_analysis(faces.data,20)
fig = plt.figure( figsize=(18,4))
plt.gray()
for i in range(0,20):
    ax = fig.add_subplot(2,10,i+1 )
    ax.matshow(W[:,i].reshape((64,64)))
    plt.title("Face(" + str(i) + ")")
    plt.box(False) #去掉边框
    plt.axis("off")#不显示坐标轴
plt.show()
sample_indx = np.random.randint(0,len(faces.data)) #随机选择一个人脸的索引
#显示原始人脸
plt.matshow(faces.data[sample_indx].reshape((64,64)))
plt.matshow(faces.data.mean(axis=0).reshape((64,64)) + W.dot(faces_reduced[sample_indx]).reshape((64,64)))
fig = plt.figure( figsize=(20,4))
plt.gray()
ax = fig.add_subplot(2,11,1)
ax.matshow(faces.data.mean(axis=0).reshape((64,64))) #显示平均脸
for i in range(0,20):
    ax = fig.add_subplot(2,11,i+2 )
    ax.matshow(W[:,i].reshape((64,64)))
    plt.title( str(round(faces_reduced[sample_indx][i],2)) + "*Face(" + str(i) + ")")
    plt.box(False) #去掉边框
    plt.axis("off")#不显示坐标轴
plt.show()