机器学习13—PCA学习笔记

 主成分分析PCA

机器学习实战之PCA

test13.py

#-*- coding:utf-8
import sys
sys.path.append("pca.py")

import pca
from numpy import *


dataMat = pca.loadDataSet('testSet.txt')
lowDMat, reconMat, eigVals, eigVects = pca.pca(dataMat, 1)
res = shape(lowDMat)
print("lowDMat:")
print(lowDMat)

print("reconMat:")
print(reconMat)

print("eigVals:")
print(eigVals)

print("eigVects:")
print(eigVects)


import matplotlib
import matplotlib.pyplot as plt
fig=plt.figure()
ax=fig.add_subplot(111)
#三角形表示原始数据点
ax.scatter(dataMat[:,0].flatten().A[0],dataMat[:,1].flatten().A[0], marker='^',s=90)
#圆形点表示第一主成分点,点颜色为红色
ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0], marker='o',s=90,c='red')
plt.show()

print("over!!!")

pca.py

'''
Created on Jun 1, 2011

@author: Peter Harrington
'''
from numpy import *

def loadDataSet(fileName, delim= '	'):#delim= '     '
    fr = open(fileName)
    stringArr = [line.strip().split(delim) for line in fr.readlines()]
    datArr = [list(map(float,line)) for line in stringArr]
    return mat(datArr)

def pca(dataMat, topNfeat=4096):
    meanVals = mean(dataMat, axis=0)
    meanRemoved = dataMat - meanVals #remove mean
    covMat = cov(meanRemoved, rowvar=0)
    eigVals,eigVects = linalg.eig(mat(covMat))
    eigValInd = argsort(eigVals)            #sort, sort goes smallest to largest
    eigValInd = eigValInd[:-(topNfeat+1):-1]  #cut off unwanted dimensions
    redEigVects = eigVects[:,eigValInd]       #reorganize eig vects largest to smallest
    lowDDataMat = meanRemoved * redEigVects#transform data into new dimensions
    reconMat = (lowDDataMat * redEigVects.T) + meanVals
    return lowDDataMat, reconMat, eigVals,eigVects

def replaceNanWithMean():
    datMat = loadDataSet('secom.data', ' ')
    numFeat = shape(datMat)[1]
    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:,i].A))[0],i]) #values that are not NaN (a number)
        datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal  #set NaN values to mean
    return datMat
原文地址:https://www.cnblogs.com/Vae1990Silence/p/8652269.html