利用皮尔逊相关系数找出与目标最相关的特征(Python实现)

#coding:utf-8
#检测各特征和辐照度之间的相关性以及各个特征之间的相关性
from __future__ import division
import tensorflow as tf
import math
import csv
from sklearn import metrics
import numpy as np
from math import sqrt
from math import sqrt
from sklearn import preprocessing
i=0
j=[]
data = []
X = []
list=['0','1','2','3','4','5','6']
#with open('D:/辐照度数据表/day_winter.csv') as f:
with open(r'D:夏季.csv') as f:
reader = csv.reader(f)
for row in reader:
if i == 0:
i += 1
continue
else:
data.append(row[:])
data = np.array(data)
print("the shape of data",np.shape(data))
m,n = np.shape(data)
print("the shape of data",m,n)
for i in range(m):
for j in range(n):
data[i][j] = data[i][j].astype('float64')
y = data[:,-1]
y1 = data[:,-1]
set2 = data[:,-1]
print("*******************************************************")
file = open("E:/predict_pierxun1.txt", 'a')
file.write(" "+" ")
file.write(" 雨天天气下各特征与辐照度的相关系数 "+" ")
file.write("———————————————————————————————————"+" ")
for i in range(12):
set1 = data[:, i]
set1 = set1.astype('float64')
set2 = set2.astype('float64')
fenzi = sum((set1 - np.mean(set1)) * (set2 - np.mean(set2)))
fenmu1 = sqrt(sum(pow((set1 - np.mean(set1)), 2)))
fenmu2 = sqrt(sum(pow((set2 - np.mean(set2)), 2)))
jieguo = fenzi / (fenmu1 * fenmu2)
jieguo1.append(jieguo)
print("*******************************************************")
jieguo2 = dict(map(lambda x,y:[x,y],list,jieguo1))
jieguo3 = dict(map(lambda x,y:[x,abs(y)],list,jieguo1))
def fun(s):
d = sorted(s.items(), key=lambda t: t[1], reverse=True)
return d
file.write("重要特征排序(按相关系数值的绝对值从大到小): "+" ")
d = fun(jieguo2)
d1 = fun(jieguo3)
for f in range(7):
print("%d. feature %s (%f)" % (f + 1, d1[f][0], d1[f][1]))
file.write(str(d1[f][0])+" ")
file.write(" ")
————————————————
版权声明:本文为CSDN博主「simple_hututu」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/pwtd_huran/article/details/79729029

原文地址:https://www.cnblogs.com/wcxia1985/p/14807547.html