机器学习——圆形线性回归—

机器学习——圆形线性回归——注释就是笔记

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


def f(x):
    a = theta4
    b = theta5 * X1_new + theta2
    c = theta0 + theta1 * x + theta3 * x * x
    X2_new_boundary1 = (-b + np.sqrt(b * b - 4 * a * c)) / (2 * a)
    X2_new_boundary2 = (-b - np.sqrt(b * b - 4 * a * c)) / (2 * a)
    return X2_new_boundary1, X2_new_boundary2


if __name__ == '__main__':
    '''
           逻辑回归
       '''
    # load the data
    data = pd.read_csv('')
    data.head()
    '''
        第一次查看所有数据
    '''
    # visualize the data
    fig1 = plt.figure()
    plt.scatter(data.loc[:, 'example1'], data.loc[:, 'example2'])  # .......导入数据
    plt.title('example1-example2')  # 设置表名
    plt.xlabel('example1')  # 设置X坐标轴
    plt.ylabel('example2')  # 设置Y坐标轴
    plt.show()  # 查看图像
    '''
        第二次查看带有正确错误标识的数据
    '''
    # add label mask
    mask = data.loc[:, 'pass'] == 1
    fig2 = plt.figure()
    passed = plt.scatter(data.loc[:, 'example1'][mask], data.loc[:, 'example2'][mask])  # .......导入数据
    failed = plt.scatter(data.loc[:, 'example1'][~mask], data.loc[:, 'example2'][~mask])  # .......导入数据
    plt.title('example1-example2')  # 设置表名
    plt.xlabel('example1')  # 设置X坐标轴
    plt.ylabel('example2')  # 设置Y坐标轴
    plt.legend((passed, failed), ('passed', 'failed'))
    plt.show()  # 查看图像

    # define X,Y
    X = data.drop(['pass'], axis=1)
    y = data.loc[:, 'pass']
    y.head  # 查看数据
    X1 = data.loc[:, 'example1']
    X2 = data.loc[:, 'example2']

    X1_2 = X1 * X1
    X2_2 = X2 * X2
    X1_X2 = X1 * X2
    X_new = {'X1': X1, 'X2': X2, 'X1_2': X1_2, 'X2_2': X2_2, 'X1_X2': X1_X2}
    X_new = pd.DataFrame(X_new)
    print(X_new)

    # 创建新的训练
    LR2 = LogisticRegression()
    LR2.fit(X_new, y)
    y2_predict = LR2.predict(X_new)  # 预测
    accuracy2 = accuracy_score(y, y2_predict)
    print(accuracy2)
    X1_new = X1.sort_values()  # 从小到大排序
    theta0 = LR2.intercept_
    theta1, theta2, theta3, theta4, theta5 = LR2.coef_[0][0], LR2.coef_[0][1], LR2.coef_[0][2], LR2.coef_[0][3], 
                                             LR2.coef_[0][4]
    # 制作曲线参数
    a = theta4
    b = theta5 * X1_new + theta2
    c = theta0 + theta1 * X1_new + theta3 * X1_new * X1_new
    X2_new_boundary = (-b + np.sqrt(b * b - 4 * a * c)) / (2 * a)

    fig4 = plt.figure()
    passed = plt.scatter(data.loc[:, 'example1'][mask], data.loc[:, 'example2'][mask])  # .......导入数据
    failed = plt.scatter(data.loc[:, 'example1'][~mask], data.loc[:, 'example2'][~mask])  # .......导入数据
    plt.plot(X1_new, X2_new_boundary)
    plt.title('example1-example2')  # 设置表名
    plt.xlabel('example1')  # 设置X坐标轴
    plt.ylabel('example2')  # 设置Y坐标轴
    plt.legend((passed, failed), ('passed', 'failed'))
    plt.show()  # 查看图像
    '''
        如果在这里使用二阶线性回归，那么只有一半的数据能被隔离开
        也就是说忽略了X的第二种结果
        接下来就是加上这种结果的第二条曲线
    '''

    '''
        正确方法
    '''


    # define f(x)   --> 8


    X2_new_boundary1 = []
    X2_new_boundary2 = []
    for x in X1_new:
        X2_new_boundary1.append(f(x)[0])
        X2_new_boundary2.append(f(x)[1])
    print(X2_new_boundary1, X2_new_boundary2)

    fig5 = plt.figure()
    passed = plt.scatter(data.loc[:, 'example1'][mask], data.loc[:, 'example2'][mask])  # .......导入数据
    failed = plt.scatter(data.loc[:, 'example1'][~mask], data.loc[:, 'example2'][~mask])  # .......导入数据
    plt.plot(X1_new, X2_new_boundary1)
    plt.plot(X1_new, X2_new_boundary2)
    plt.title('example1-example2')  # 设置表名
    plt.xlabel('example1')  # 设置X坐标轴
    plt.ylabel('example2')  # 设置Y坐标轴
    plt.legend((passed, failed), ('passed', 'failed'))
    plt.show()  # 查看图像
    '''
        你会发现虽然你补上了另外一个X值，但是两个X对应的曲线并没有连在一起
        
        这是因为数之间本来就是有间隔的，不全，所以连不上，这时候需要我们把他补全 
    '''

    X1_range = [-0.9 + x / 10000 for x in range(0, 19000)]
    X1_range = np.array(X1_range)
    X2_new_boundary1 = []
    X2_new_boundary2 = []
    for x in X1_new:
        X2_new_boundary1.append(f(x)[0])
        X2_new_boundary2.append(f(x)[1])
    fig5 = plt.figure()
    passed = plt.scatter(data.loc[:, 'example1'][mask], data.loc[:, 'example2'][mask])  # .......导入数据
    failed = plt.scatter(data.loc[:, 'example1'][~mask], data.loc[:, 'example2'][~mask])  # .......导入数据
    plt.plot(X1_range, X2_new_boundary1)
    plt.plot(X1_range, X2_new_boundary2)
    plt.title('example1-example2')  # 设置表名
    plt.xlabel('example1')  # 设置X坐标轴
    plt.ylabel('example2')  # 设置Y坐标轴
    plt.legend((passed, failed), ('passed', 'failed'))
    plt.show()  # 查看图像