统计_多重检验_LSD方法不准确性(python代码实现)

python机器学习-乳腺癌细胞挖掘（博主亲自录制视频）

https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

机器学习，统计项目联系：QQ231469242

#mental
group1=[2,2,3,4,4,5,3,4,4,4]
#physical
group2=[4,4,3,5,4,1,1,2,3,3]
#medical
group3=[1,2,2,2,3,2,3,1,3,1]

多重检验结果和贾俊平的LSD结果不一样，经过T配对试验，多重检验和T配对试验一致，LSD对小样本可能不准确

# -*- coding: utf-8 -*-

# Import standard packages
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import os
# additional packages
import sys
sys.path.append(os.path.join('..', '..', 'Utilities'))

try:
# Import formatting commands if directory "Utilities" is available
    from ISP_mystyle import showData 
    
except ImportError:
# Ensure correct performance otherwise
    def showData(*options):
        plt.show()
        return
     

# Other required packages
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
                                         MultiComparison)
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.libqsturng import psturng


def setData():
    ''' Set up the data, as a structured array. '''
    
    # The first and last field are 32-bit intergers; the second field is an
    # 8-byte string. Note that here we can also give names to the individual
    # fields!
    data = np.rec.array([
    (  1,   'mental',  2 ),
    (  2,   'mental',  2 ),
    (  3,   'mental',  3 ),
    (  4,   'mental',  4 ),
    (  5,   'mental',  4 ),
    (  6,   'mental',  5 ),
    (  7,   'mental',  3 ),
    (  8,   'mental',  4 ),
    (  9,   'mental',  4 ),
    ( 10,   'mental',  4 ),
    ( 11, 'physical',  4 ),
    ( 12, 'physical',  4 ),
    ( 13, 'physical',  3 ),
    ( 14, 'physical',  5 ),
    ( 15, 'physical',  4 ),
    ( 16, 'physical',  1 ),
    ( 17, 'physical',  1 ),
    ( 18, 'physical',  2 ),
    ( 19, 'physical',  3 ),
    ( 20, 'physical',  3 ),
    ( 21,  'medical',  1 ),
    ( 22,  'medical',  2 ),
    ( 23,  'medical',  2 ),
    ( 24,  'medical',  2 ),
    ( 25,  'medical',  3 ),
    ( 26,  'medical',  2 ),
    ( 27,  'medical',  3 ),
    ( 28,  'medical',  1 ),
    ( 29,  'medical',  3 ),
    ( 30,  'medical',  1 )], dtype=[('idx', '<i4'),
                                    ('Treatment', '|S8'),
                                    ('StressReduction', '<i4')])
    return data
   
    
def doAnova(data):
    '''one-way ANOVA'''
    
    df = pd.DataFrame(data)
    model = ols('StressReduction ~ C(Treatment)',df).fit()
    
    anovaResults =  anova_lm(model)
    print(anovaResults)
    if anovaResults['PR(>F)'][0] < 0.05:
        print('One of the groups is different.')

def doTukey(data, multiComp):    
    '''Do a pairwise comparison, and show the confidence intervals'''
    
    print((multiComp.tukeyhsd().summary()))
    
    # Calculate the p-values:
    res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment'])
    df = pd.DataFrame(data)
    numData = len(df)
    numTreatments = len(df.Treatment.unique())
    dof = numData - numTreatments
    
    # Show the group names
    print((multiComp.groupsunique))
    
    # Generate a print -------------------
    
    # Get the data
    xvals = np.arange(3)
    res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment'])
    errors = np.ravel(np.diff(res2.confint)/2)
    
    # Plot them
    plt.plot(xvals, res2.meandiffs, 'o')
    plt.errorbar(xvals, res2.meandiffs, yerr=errors, fmt='o')
    
    # Put on labels
    pair_labels = multiComp.groupsunique[np.column_stack(res2._multicomp.pairindices)]
    plt.xticks(xvals, pair_labels)
    
    # Format the plot
    xlim = -0.5, 2.5
    plt.hlines(0, *xlim)
    plt.xlim(*xlim)
    plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' +
              '
 Pairwise Mean Differences')          
    
    # Save to outfile, and show the data
    outFile = 'multComp.png'
    showData(outFile)
    
def Holm_Bonferroni(multiComp):
    ''' Instead of the Tukey's test, we can do pairwise t-test '''
    
    # First, with the "Holm" correction
    rtp = multiComp.allpairtest(stats.ttest_rel, method='Holm')
    print((rtp[0]))
    
    # and then with the Bonferroni correction
    print((multiComp.allpairtest(stats.ttest_rel, method='b')[0]))
    
    # Any value, for testing the program for correct execution
    checkVal = rtp[1][0][0,0]
    return checkVal
    
def main():
    # Get the data
    data = setData()
    
    # Do a one-way ANOVA
    doAnova(data)
    
    #Then, do the multiple testing
    multiComp = MultiComparison(data['StressReduction'], data['Treatment'])
    
    doTukey(data, multiComp)    # Tukey's HSD test
    checkVal = Holm_Bonferroni(multiComp)  # Alternatives to Tukey's HSD test
    
    return checkVal     # this is only for regression testing of the program
    
if __name__ == '__main__':
    main()

多重检验结果和贾俊平的LSD结果不一样，经过T配对试验，多重检验和T配对试验一致，LSD对小样本可能不准确

# -*- coding: utf-8 -*-

import scipy,math
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors
#多重比较
from statsmodels.sandbox.stats.multicomp import multipletests
#用于排列组合
import itertools
#独立T检验
from scipy.stats import ttest_ind
#配对T检验
from scipy.stats import ttest_rel 


#mental 
group1=[2,2,3,4,4,5,3,4,4,4]
#physical
group2=[4,4,3,5,4,1,1,2,3,3]
#medical
group3=[1,2,2,2,3,2,3,1,3,1]

list_groups=[group1,group2,group3]
list_total=group1+group2+group3
a=0.05
 
#one within group error,also know as random error
def SE(group):
    se=0
    mean1=np.mean(group)
    for i in group:
        error=i-mean1
        se+=error**2
    return se
     
'''
>>> SE(group1)
22.0
>>> SE(group2)
18.0
>>> SE(group3)
14.0
'''
#sum of squares within group error,also know as random error   
def SSE(list_groups): 
    sse=0
    for group in list_groups:
        se=SE(group)
        sse+=se
    return sse
         
     
#误差总平方和   
def SST(list_total):
    sst=0
    mean1=np.mean(list_total)
    for i in list_total:
        error=i-mean1
        sst+=error**2
    return sst
     
     
#SSA,between-group sum of squares 组间平方和,公式1：ssa=sst-sse
def SSA(list_groups,list_total):
    sse=SSE(list_groups)
    sst=SST(list_total)
    ssa=sst-sse
    return ssa
     
     
#SSA,between-group sum of squares 组间平方和
def SSA1(list_groups,list_total):
    mean_total=np.mean(list_total)
    ssa=0
    for group in list_groups:
        group_mean=np.mean(group)
        distance=(mean_total-group_mean)**2
        ssa+=distance
     
    ssa=ssa*5
    return ssa       
     
#处理效应均方   
def MSA(list_groups,list_total):
    ssa=SSA(list_groups,list_total)
    msa=ssa/(len(list_groups)-1)*1.0
    return msa
     
# 误差均方
def MSE(list_groups,list_total):
    sse=SSE(list_groups)
    mse=sse/(len(list_total)-1*len(list_groups))*1.0 
    return mse             
     
#F score
def F(list_groups,list_total):
    msa=MSA(list_groups,list_total)
    mse=MSE(list_groups,list_total)
    ratio=msa/mse*1.0
    return ratio
'''
>>> F(list_groups,list_total)
22.592592592592595
'''
   
#LSD检验有问题，需要核对，不如配对T检验准确 
#LSD(least significant difference)最小显著差异
def LSD(list_groups,list_total,sample1,sample2):
    mean1=np.mean(sample1)
    mean2=np.mean(sample2)
    distance=abs(mean1-mean2)
    print"distance:",distance
    #t检验的自由度
    df=len(list_total)-1*len(list_groups)
    mse=MSE(list_groups,list_total)
    print"MSE:",mse
    t_value=stats.t(df).isf(a/2)
    print"t value:",t_value
    lsd=t_value*math.sqrt(mse*(1.0/len(sample1)+1.0/len(sample2)))
    print "LSD:",lsd
    if distance<lsd:
        print"no significant difference between:",sample1,sample2
    else:
        print"there is significant difference between:",sample1,sample2
     
  
#正态分布测试
def check_normality(testData):
     
    #20<样本数<50用normal test算法检验正态分布性
    if 20<len(testData) <50:
       p_value= stats.normaltest(testData)[1]
       if p_value<0.05:
           print"use normaltest"
           print "data are not normal distributed"
           return  False
       else:
           print"use normaltest"
           print "data are normal distributed"
           return True
     
    #样本数小于50用Shapiro-Wilk算法检验正态分布性
    if len(testData) <50:
       p_value= stats.shapiro(testData)[1]
       if p_value<0.05:
           print "use shapiro:"
           print "data are not normal distributed"
           return  False
       else:
           print "use shapiro:"
           print "data are normal distributed"
           return True
       
    if 300>=len(testData) >=50:
       p_value= lillifors(testData)[1]
       if p_value<0.05:
           print "use lillifors:"
           print "data are not normal distributed"
           return  False
       else:
           print "use lillifors:"
           print "data are normal distributed"
           return True
     
    if len(testData) >300: 
       p_value= stats.kstest(testData,'norm')[1]
       if p_value<0.05:
           print "use kstest:"
           print "data are not normal distributed"
           return  False
       else:
           print "use kstest:"
           print "data are normal distributed"
           return True
 
 
#对所有样本组进行正态性检验
def NormalTest(list_groups):
    for group in list_groups:
        #正态性检验
        status=check_normality(group1)
        if status==False :
            return False
             
#排列组合函数
def Combination(list_groups):
    combination= []
    for i in range(1,len(list_groups)+1):
        iter = itertools.combinations(list_groups,i)
        combination.append(list(iter))
    #需要排除第一个和最后一个
    return combination[1:-1][0]
'''
Out[57]:
[[([2, 3, 7, 2, 6], [10, 8, 7, 5, 10]),
  ([2, 3, 7, 2, 6], [10, 13, 14, 13, 15]),
  ([10, 8, 7, 5, 10], [10, 13, 14, 13, 15])]]
'''       
             
#多重比较           
def Multiple_test(list_groups):
    
    combination=Combination(list_groups)
    for pair in combination:
        LSD(list_groups,list_total,pair[0],pair[1])
         
                 
                             
#discriptive statistcs
print "discriptive statistcs----------------------------------------------"
print "group1 mean",np.mean(group1)
print "group2 mean",np.mean(group2)
print "group3 mean",np.mean(group3)  
                                                 
     
#对所有样本组进行正态性检验 
print"M=Normality test:-----------------------------------" 
NormalTest(list_groups)
 
#方差齐性检测
print"levene test:-----------------------------------"
leveneResult=scipy.stats.levene(group1,group2,group3)
leveneScore=leveneResult[0]
p_levene=leveneResult[1]
if p_levene<0.05:
   print"levene test is not fit,be attention!"
else:
   print"levene test is ok"
'''
H0成立，三组数据方差无显著差异
Out[9]: LeveneResult(statistic=0.24561403508771934, pvalue=0.7860617221429711)
'''
 
print "result--------------------------------------------------"
f_score=F(list_groups,list_total)
print"F score:",f_score
#sf 为生存函数survival function
#组数自由度
df1=len(list_groups)-1
#所有样本的自由度
df2=len(list_total)-1*len(list_groups)
probability=f.sf(f_score,df1,df2)
print"p value:",probability
'''
Out[28]: 8.5385924542746692e-05
''' 
if probability<0.05:
    print"there is significance,H1 win"
else:
   print"there is no significance,H0 win"
 
#多重比较
print"multiple test----------------------------------------------"
print"Multiple test",Multiple_test
Multiple_test(list_groups)

贾俊平书里的例子，综合多重检验和LSD方法结果一致

#居民区
group1=[265,310,220,290,350,300,445,480,500,430,428,530]
#商业区
group2=[410,305,450,380,310,390,590,480,510,470,415,390]
#写字楼
group3=[180,290,330,220,170,256,290,283,260,246,275,320]

# -*- coding: utf-8 -*-

# Import standard packages
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import os
# additional packages
import sys
sys.path.append(os.path.join('..', '..', 'Utilities'))

try:
# Import formatting commands if directory "Utilities" is available
    from ISP_mystyle import showData 
    
except ImportError:
# Ensure correct performance otherwise
    def showData(*options):
        plt.show()
        return
     

# Other required packages
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
                                         MultiComparison)
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.libqsturng import psturng


def setData():
    ''' Set up the data, as a structured array. '''
    
    # The first and last field are 32-bit intergers; the second field is an
    # 8-byte string. Note that here we can also give names to the individual
    # fields!
    data = np.rec.array([
    (  1,   'resident', 265 ),
    (  2,   'resident', 310 ),
    (  3,   'resident', 220 ),
    (  4,   'resident', 290 ),
    (  5,   'resident', 350 ),
    (  6,   'resident', 300 ),
    (  7,   'resident', 445 ),
    (  8,   'resident', 480 ),
    (  9,   'resident', 500 ),
    ( 10,   'resident', 430),
    ( 11,   'resident', 428),
    ( 12,   'resident', 530 ),
    ( 13, 'commercial', 410 ),
    ( 14, 'commercial', 305 ),
    ( 15, 'commercial', 450 ),
    ( 16, 'commercial', 380 ),
    ( 17, 'commercial', 310 ),
    ( 18, 'commercial', 390 ),
    ( 19, 'commercial', 590 ),
    ( 20, 'commercial', 480 ),
    ( 21, 'commercial', 510 ),
    ( 22, 'commercial', 470 ),
    ( 23, 'commercial', 415 ),
    ( 24, 'commercial', 390 ),
    ( 25, 'officeBuilding', 180 ),
    ( 26, 'officeBuilding', 290 ),
    ( 27, 'officeBuilding', 330 ),
    ( 28, 'officeBuilding', 220 ),
    ( 29, 'officeBuilding', 170),
    ( 30, 'officeBuilding', 256),
    ( 31, 'officeBuilding', 290 ),
    ( 32, 'officeBuilding', 283 ),
    ( 33, 'officeBuilding', 260),
    ( 34, 'officeBuilding', 246),
    ( 35, 'officeBuilding', 275),
    ( 36, 'officeBuilding', 320),
     ], dtype=[('idx', '<i4'),
                                    ('Treatment', '|S8'),
                                    ('StressReduction', '<i4')])
    return data

   
    
def doAnova(data):
    '''one-way ANOVA'''
    
    df = pd.DataFrame(data)
    model = ols('StressReduction ~ C(Treatment)',df).fit()
    
    anovaResults =  anova_lm(model)
    print(anovaResults)
    if anovaResults['PR(>F)'][0] < 0.05:
        print('One of the groups is different.')

def doTukey(data, multiComp):    
    '''Do a pairwise comparison, and show the confidence intervals'''
    
    print((multiComp.tukeyhsd().summary()))
    
    # Calculate the p-values:
    res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment'])
    df = pd.DataFrame(data)
    numData = len(df)
    numTreatments = len(df.Treatment.unique())
    dof = numData - numTreatments
    
    # Show the group names
    print((multiComp.groupsunique))
    
    # Generate a print -------------------
    
    # Get the data
    xvals = np.arange(3)
    res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment'])
    errors = np.ravel(np.diff(res2.confint)/2)
    
    # Plot them
    plt.plot(xvals, res2.meandiffs, 'o')
    plt.errorbar(xvals, res2.meandiffs, yerr=errors, fmt='o')
    
    # Put on labels
    pair_labels = multiComp.groupsunique[np.column_stack(res2._multicomp.pairindices)]
    plt.xticks(xvals, pair_labels)
    
    # Format the plot
    xlim = -0.5, 2.5
    plt.hlines(0, *xlim)
    plt.xlim(*xlim)
    plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' +
              '
 Pairwise Mean Differences')          
    
    # Save to outfile, and show the data
    outFile = 'multComp.png'
    showData(outFile)
    
def Holm_Bonferroni(multiComp):
    ''' Instead of the Tukey's test, we can do pairwise t-test '''
    
    # First, with the "Holm" correction
    rtp = multiComp.allpairtest(stats.ttest_rel, method='Holm')
    print((rtp[0]))
    
    # and then with the Bonferroni correction
    print((multiComp.allpairtest(stats.ttest_rel, method='b')[0]))
    
    # Any value, for testing the program for correct execution
    checkVal = rtp[1][0][0,0]
    return checkVal
    
def main():
    # Get the data
    data = setData()
    
    # Do a one-way ANOVA
    doAnova(data)
    
    #Then, do the multiple testing
    multiComp = MultiComparison(data['StressReduction'], data['Treatment'])
    
    doTukey(data, multiComp)    # Tukey's HSD test
    checkVal = Holm_Bonferroni(multiComp)  # Alternatives to Tukey's HSD test
    
    return checkVal     # this is only for regression testing of the program
    
if __name__ == '__main__':
    main()

贾俊平书里的例子，综合多重检验和LSD方法结果一致

# -*- coding: utf-8 -*-

import scipy,math
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors
#多重比较
from statsmodels.sandbox.stats.multicomp import multipletests
#用于排列组合
import itertools
#独立T检验
from scipy.stats import ttest_ind
#配对T检验
from scipy.stats import ttest_rel 


#居民区 
group1=[265,310,220,290,350,300,445,480,500,430,428,530]
#商业区
group2=[410,305,450,380,310,390,590,480,510,470,415,390]
#写字楼
group3=[180,290,330,220,170,256,290,283,260,246,275,320]

list_groups=[group1,group2,group3]
list_total=group1+group2+group3
a=0.05
 
#one within group error,also know as random error
def SE(group):
    se=0
    mean1=np.mean(group)
    for i in group:
        error=i-mean1
        se+=error**2
    return se
     
'''
>>> SE(group1)
22.0
>>> SE(group2)
18.0
>>> SE(group3)
14.0
'''
#sum of squares within group error,also know as random error   
def SSE(list_groups): 
    sse=0
    for group in list_groups:
        se=SE(group)
        sse+=se
    return sse
         
     
#误差总平方和   
def SST(list_total):
    sst=0
    mean1=np.mean(list_total)
    for i in list_total:
        error=i-mean1
        sst+=error**2
    return sst
     
     
#SSA,between-group sum of squares 组间平方和,公式1：ssa=sst-sse
def SSA(list_groups,list_total):
    sse=SSE(list_groups)
    sst=SST(list_total)
    ssa=sst-sse
    return ssa
     
     
#SSA,between-group sum of squares 组间平方和
def SSA1(list_groups,list_total):
    mean_total=np.mean(list_total)
    ssa=0
    for group in list_groups:
        group_mean=np.mean(group)
        distance=(mean_total-group_mean)**2
        ssa+=distance
     
    ssa=ssa*5
    return ssa       
     
#处理效应均方   
def MSA(list_groups,list_total):
    ssa=SSA(list_groups,list_total)
    msa=ssa/(len(list_groups)-1)*1.0
    return msa
     
# 误差均方
def MSE(list_groups,list_total):
    sse=SSE(list_groups)
    mse=sse/(len(list_total)-1*len(list_groups))*1.0 
    return mse             
     
#F score
def F(list_groups,list_total):
    msa=MSA(list_groups,list_total)
    mse=MSE(list_groups,list_total)
    ratio=msa/mse*1.0
    return ratio
'''
>>> F(list_groups,list_total)
22.592592592592595
'''
   
#LSD检验有问题，需要核对，不如配对T检验准确 
#LSD(least significant difference)最小显著差异
def LSD(list_groups,list_total,sample1,sample2):
    mean1=np.mean(sample1)
    mean2=np.mean(sample2)
    distance=abs(mean1-mean2)
    print"distance:",distance
    #t检验的自由度
    df=len(list_total)-1*len(list_groups)
    mse=MSE(list_groups,list_total)
    print"MSE:",mse
    t_value=stats.t(df).isf(a/2)
    print"t value:",t_value
    lsd=t_value*math.sqrt(mse*(1.0/len(sample1)+1.0/len(sample2)))
    print "LSD:",lsd
    if distance<lsd:
        print"no significant difference between:",sample1,sample2
    else:
        print"there is significant difference between:",sample1,sample2
     
  
#正态分布测试
def check_normality(testData):
     
    #20<样本数<50用normal test算法检验正态分布性
    if 20<len(testData) <50:
       p_value= stats.normaltest(testData)[1]
       if p_value<0.05:
           print"use normaltest"
           print "data are not normal distributed"
           return  False
       else:
           print"use normaltest"
           print "data are normal distributed"
           return True
     
    #样本数小于50用Shapiro-Wilk算法检验正态分布性
    if len(testData) <50:
       p_value= stats.shapiro(testData)[1]
       if p_value<0.05:
           print "use shapiro:"
           print "data are not normal distributed"
           return  False
       else:
           print "use shapiro:"
           print "data are normal distributed"
           return True
       
    if 300>=len(testData) >=50:
       p_value= lillifors(testData)[1]
       if p_value<0.05:
           print "use lillifors:"
           print "data are not normal distributed"
           return  False
       else:
           print "use lillifors:"
           print "data are normal distributed"
           return True
     
    if len(testData) >300: 
       p_value= stats.kstest(testData,'norm')[1]
       if p_value<0.05:
           print "use kstest:"
           print "data are not normal distributed"
           return  False
       else:
           print "use kstest:"
           print "data are normal distributed"
           return True
 
 
#对所有样本组进行正态性检验
def NormalTest(list_groups):
    for group in list_groups:
        #正态性检验
        status=check_normality(group1)
        if status==False :
            return False
             
#排列组合函数
def Combination(list_groups):
    combination= []
    for i in range(1,len(list_groups)+1):
        iter = itertools.combinations(list_groups,i)
        combination.append(list(iter))
    #需要排除第一个和最后一个
    return combination[1:-1][0]
'''
Out[57]:
[[([2, 3, 7, 2, 6], [10, 8, 7, 5, 10]),
  ([2, 3, 7, 2, 6], [10, 13, 14, 13, 15]),
  ([10, 8, 7, 5, 10], [10, 13, 14, 13, 15])]]
'''       
             
#多重比较           
def Multiple_test(list_groups):
    
    combination=Combination(list_groups)
    for pair in combination:
        LSD(list_groups,list_total,pair[0],pair[1])
         
                 
                             
#discriptive statistcs
print "discriptive statistcs----------------------------------------------"
print "group1 mean",np.mean(group1)
print "group2 mean",np.mean(group2)
print "group3 mean",np.mean(group3)  
                                                 
     
#对所有样本组进行正态性检验 
print"M=Normality test:-----------------------------------" 
NormalTest(list_groups)
 
#方差齐性检测
print"levene test:-----------------------------------"
leveneResult=scipy.stats.levene(group1,group2,group3)
leveneScore=leveneResult[0]
p_levene=leveneResult[1]
if p_levene<0.05:
   print"levene test is not fit,be attention!"
else:
   print"levene test is ok"
'''
H0成立，三组数据方差无显著差异
Out[9]: LeveneResult(statistic=0.24561403508771934, pvalue=0.7860617221429711)
'''
 
print "result--------------------------------------------------"
f_score=F(list_groups,list_total)
print"F score:",f_score
#sf 为生存函数survival function
#组数自由度
df1=len(list_groups)-1
#所有样本的自由度
df2=len(list_total)-1*len(list_groups)
probability=f.sf(f_score,df1,df2)
print"p value:",probability
'''
Out[28]: 8.5385924542746692e-05
''' 
if probability<0.05:
    print"there is significance,H1 win"
else:
   print"there is no significance,H0 win"
 
#多重比较
print"multiple test----------------------------------------------"
print"Multiple test",Multiple_test
Multiple_test(list_groups)

python风控评分卡建模和风控常识(博客主亲自录制视频教程)

https://study.163.com/course/introduction.htm?courseId=1005214003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share