【逻辑回归的特征筛选方法】

数据特征如下

使用逻辑回归的稳定性选择

import pandas as pd
import numpy as np
import pyecharts
import xlrd

# with open(r'F:数据分析专用数据分析与机器学习ankloan.xls', 'rb') as f:
file = r'F:数据分析专用数据分析与机器学习ankloan.xls'
data = pd.read_excel(file)
    # print(data.head())
x = data.iloc[:, :8].values
# print(x)
y = data.iloc[:, 8].values
# print(y)


from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
rlr = RLR()
rlr.fit(x, y)
rlr.get_support()
validate_feature = data.iloc[:, :8]
print(u'有效特征为:%s' % ','.join(validate_feature.columns[rlr.get_support()]))
x = data[validate_feature.columns[rlr.get_support()]].values

lr = LR()
lr.fit(x, y)
print(u'模型的平均正确率:%s' % lr.score(x, y))
View Code

Win a contest, win a challenge
原文地址:https://www.cnblogs.com/pandaboy1123/p/9777189.html