python进行数据分析------相关分析

相关分析

import statsmodels.api as sm
import pandas as pd
import numpy as np
from patsy.highlevel import dmatrices # 这个是线性回归的
from common.util.my_sqlalchemy import sqlalchemy_engine
import math

from scipy.stats.stats import pearsonr


sql = "select Q1R3, Q1R5, Q1R6, Q1R7 from db2017091115412316222027656281_1;"
df = pd.read_sql(sql, sqlalchemy_engine)
df_dropna = df.dropna()

result = pearsonr(df_dropna['Q1R3'], df_dropna['Q1R5'])
print(result)

报告展示:

相关性检验显示,rkzzlgmsr显著负相关(Pearson’r=-0.529,p<0.05)。

p>0.5则写:rkzzlgmsr无显著相关关系(Pearson’r=-0.529,p>0.05)。

 


Pearson’r

p


-0.5292

0.0425

 

 

 

 

  A B C
A AA  AB AC
B AB BB BC
C AC CB CC

二期

经过数据分析部指导,系数做了算法优化

def CorrelationAnalysisDetail(UserID,ProjID,QuesID,VariableNames,CasesCondition,VariableIDs,Corr):
    select_id_ret = select_ques_datatableid_optionid()
    whether_datatableid = select_id_ret.SelectDatatableIDTwoSql(UserID, ProjID, QuesID, VariableIDs[0])
    select_id_ret.close()
    if whether_datatableid:
        DataTableID = whether_datatableid[0]["DataTableID"]
        DatabaseName = whether_datatableid[0]["DatabaseName"]
        TableName = JoinTableName(whether_datatableid)
        df_dropna = CorrelationAnalysisModel(VariableNames,TableName, DatabaseName, CasesCondition)
        # spearman 斯皮尔曼系数
        # kendall 肯德尔系数
        # pearson 皮尔逊系数
        # return pearsonr(df_dropna[xVariable], df_dropna[yVariable])

        return df_dropna.corr(method=Corr).to_dict()
原文地址:https://www.cnblogs.com/renfanzi/p/7600334.html