小马哥课堂-统计学-置信区间(2)

 1 #!/usr/bin/env python3
 2 #-*- coding:utf-8 -*-
 3 #############################################
 4 #File Name: ci_ex1.py
 5 #Brief: 示例1
 6 #Author: frank
 7 #Email: frank0903@aliyun.com
 8 #Created Time:2018-08-13 19:33:11
 9 #Blog: http://www.cnblogs.com/black-mamba
10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
11 #############################################
12 import matplotlib.pyplot as plt
13 import numpy as np
14 import scipy.stats
15 import random
16 
17 #随机抽取36个苹果,计算200,000苹果的重量在100到124g范围的概率
18 def sampling_distribution(data,confidence=0.95,sampling_times=1,sampling_size=30):
19     list_sampling_mean = []
20     
21     for i in np.arange(sampling_times):
22         #samples = np.random.choice(data,sampling_size)#重复抽样
23         samples = random.sample(list(data),sampling_size)#不重复抽样
24         #print("samples:{}".format(samples))
25         samples_mean = np.mean(samples)
26         list_sampling_mean.append(samples_mean)
27         
28     
29     #the sampling distribution of the sampling mean,样本均值的抽样分布
30     sam_mean = np.mean(list_sampling_mean)
31     print("样本均值分布的期望,sam_mean:{}".format(sam_mean))
32     sam_std = np.std(list_sampling_mean)
33     print("样本均值分布的标准差,sam_std:{}".format(sam_std))
34     
35     #获取样本均值分布~N(sam_mean, sam_std^2)
36     norm = scipy.stats.norm(loc=sam_mean, scale=sam_std)
37     
38     #获取距离sam_mean 4个sam_std的范围
39     x = np.arange(sam_mean-sam_std*4, sam_mean+sam_std*4, 1)
40     #获取x对应的概率密度函数值
41     y = norm.pdf(x)
42     #显示样本均值分布
43     plt.plot(x, y,'b--',alpha=0.7,label='sample distribution of the sample mean')
44 
45     #获取置信区间,方法1
46     confidence *= 100
47     c_low = (100 - confidence) / 2
48     c_high = 100 - c_low
49     c_interval = np.percentile(list_sampling_mean,[c_low, c_high])
50     print("95%置信区间端点对应的x坐标:{}".format(c_interval))
51     
52 
53     #获取置信区间,方法2
54     c_interval1 = [norm.isf(1-0.025),norm.isf(1-0.975)]
55     print("95%置信区间端点对应的x坐标:{}".format(c_interval1))
56     print("[{},{}]".format(sam_mean-sam_std*1.8, sam_mean+sam_std*1.8))
57 
58     #绘制置信区间对应的竖线
59     a = c_interval[0]
60     b = c_interval[1]
61     plt.vlines(a, 0, norm.pdf(a),'r')
62     plt.vlines(b, 0, norm.pdf(b),'r')
63     
64     a1 = c_interval1[0]
65     b1 = c_interval1[1]
66     plt.vlines(a1, 0, norm.pdf(a1),'g',alpha=0.5)
67     plt.vlines(b1, 0, norm.pdf(b1),'g',alpha=0.5)
68     
69     fill_x = np.linspace(a1,b1,100)
70     #print("type of fill_x:{}".format(type(fill_x)))
71     fill_y = norm.pdf(fill_x)
72     #print("type of fill_y:{}".format(type(fill_y)))
73     plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
74 
75 
76 #模拟总体200,000个苹果的重量数据
77 #用不同的数据测试
78 #data1 = np.random.randint(10, 200, size=200000)
79 data1 = np.random.randint(50, 200, size=200000)
80 
81 #总体均值
82 population_mean = np.mean(data1)
83 print("population mean:{}".format(population_mean))
84 
85 #总体标准差
86 population_std = np.std(data1)
87 print("population std:{}".format(population_std))
88 
89 #总体分布曲线
90 norm1 = scipy.stats.norm(population_mean, population_std)
91 x = np.arange(population_mean-population_std*3.5, population_mean+population_std*3.5, 1)
92 y = norm1.pdf(x)
93 plt.plot(x, y,'r--',label='population distribution',alpha=0.7)
94 
95 
96 sampling_distribution(data1,sampling_times=1000,sampling_size=36) 
97 
98 plt.legend()
99 plt.show() 
ci_ex1.py 演示了小马哥课堂-统计学-置信区间 的示例1

 1 #!/usr/bin/env python3
 2 #-*- coding:utf-8 -*-
 3 #############################################
 4 #File Name: ci_ex2.py
 5 #Brief: 演示示例2
 6 #Author: frank
 7 #Email: frank0903@aliyun.com
 8 #Created Time:2018-08-13 20:14:44
 9 #Blog: http://www.cnblogs.com/black-mamba
10 #Github: https://github.com/xiaomagejunfu0903/statistic_notes
11 #############################################
12 import numpy as np
13 import matplotlib.pyplot as plt
14 import matplotlib as mpl
15 import scipy.stats
16 
17 #设置中文字体
18 zhfont = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
19 
20 #95%置信水平的标准正态分布的置信区间演示
21 norm = scipy.stats.norm()
22 #x轴
23 x = np.linspace(norm.ppf(0.001),norm.ppf(0.999), 10000)
24 #y轴
25 y= norm.pdf(x)
26 #显示标准正态分布曲线
27 plt.plot(x,y)
28 
29 #画置信区间的界限
30 plt.vlines(-1.96,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed')
31 plt.vlines(1.96,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed')
32 
33 #显示 置信区间
34 fill_x = np.linspace(-1.96,1.96,100)
35 #print("type of fill_x:{}".format(type(fill_x)))
36 fill_y = norm.pdf(fill_x)
37 #print("type of fill_y:{}".format(type(fill_y)))
38 plt.fill_between(fill_x, fill_y, color='b',alpha=0.5)
39 
40 #已知置信水平,求置信区间
41 
42 #方法1:不太准确
43 confidence = 95
44 c_low = (100-confidence)/2
45 c_high = 100-c_low
46 c_interval = np.percentile(x,[c_low, c_high])
47 print(c_interval)
48 plt.vlines(c_interval[0] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
49 plt.vlines(c_interval[1] ,ymin=0,ymax=0.3,colors='purple', linestyles='dashed')
50 
51 #方法2:更好的方式
52 c_interval = [norm.isf(1-0.025),norm.isf(1-0.975)]
53 print(c_interval)
54 plt.vlines(c_interval[0] ,ymin=0,ymax=norm.pdf(-1.96),colors='r', linestyles='dashed',alpha=0.5)
55 plt.vlines(c_interval[1] ,ymin=0,ymax=norm.pdf(1.96),colors='r', linestyles='dashed',alpha=0.5)
56 
57 plt.title('标准正态分布的95%置信区间',fontproperties=zhfont)
58 plt.savefig("ci_ex2.jpg")
59 #print("pdf(1.96):{}".format(norm.pdf(1.96)))
60 #print("cdf(1.96):{}".format(norm.cdf(1.96)))
61 #print("sf(1.96):{}".format(norm.sf(1.96)))
62 #print("isf(0.025):{}".format(norm.isf(0.025)))
63 plt.show()
ci_ex2.py 演示了小马哥课堂-统计学-置信区间 的示例2


原文地址:https://www.cnblogs.com/black-mamba/p/9472824.html